예제 #1
0
def predictMain(modelName,sc):
    timeSteps= 30                                                                   # No of past values that has to be used for Training purpose
    print "Going to Initialize the LSTM model"
    SMARTparameters=getSMARTParameters()
    print("The following are the SMART parameters:",SMARTparameters)
    lstm = ls.cloudLSTM(timeSteps=timeSteps,parms=SMARTparameters)                  # Initializing the DiskPrediction Model(LSTM Model)
    print "Initialized the Model"
    lstmModel = lstm.get_LSTM_Model()                   			    # Obtaining the LSTM model for initializing SparkModel Class
    trainSize= 0.2                                                                  # Fraction of input used for Training purpose
    acc = 0.0                                                                       # Model accuracy
    inputFilePath = os.environ.get('DATA_FILE_PATH')                                # Get the Input CSV filepath from environment
    year=sys.argv[1]                                                                # get the year from the Command Line arguments
    month=sys.argv[2]                                                               # get the month from the Command Line arguments
    inputFilePath=inputFilePath+str(year)+"/"+str(year)+"-"+str(month)+"*.csv"  # For E.g "/home/user/Desktop/Cloud/Test/2014/2014-11*.csv"
    print("InputPath",inputFilePath)
    rd.generate_DataFrame(inputFilePath,SMARTparameters)
    inputCSVFilePath = os.environ.get('MODEL_CSV_FILEPATH')+str(modelName)+".csv"    # For E.g "/hadoop/elephas/Output/ST4000DM000.csv"

    modelFeatures = pd.read_csv(filepath_or_buffer=inputCSVFilePath,usecols=SMARTparameters)
    modelLabel = pd.read_csv(filepath_or_buffer=inputCSVFilePath,usecols=['failure'])   #"/hadoop/elephas/Output/ST4000DM000.csv"

    # Removing Not A Number values from the Input Dataframe
    modelFeatures = modelFeatures.fillna(0)
    modelLabel = modelLabel.fillna(0)

    # Obtaining 3D training and testing vectors
    (feature_train, label_train), (feature_test, label_test) = lstm.train_test_split(modelFeatures,modelLabel,trainSize,timeSteps)

    # Condition to check whether the failure cases exists in the data
    if len(feature_train)==0:
        print("DiskModel has no failure eleements. Training of the model cannot proceed!!")
        return
    # Initializing the Adam Optimizer for Elephas
    adam = elephas_optimizers.Adam()
    print "Adam Optimizer initialized"
    #Converting Dataframe to Spark RDD
    rddataset = to_simple_rdd(sc, feature_train, label_train)
    print "Training data converted into Resilient Distributed Dataset"
    #Initializing the SparkModel with Optimizer,Master-Worker Mode and Number of Workers
    spark_model = SparkModel(sc,lstmModel,optimizer=adam ,frequency='epoch', mode='asynchronous', num_workers=2)
    print "Spark Model Initialized"
    #Initial training run of the model
    spark_model.train(rddataset, nb_epoch=10, batch_size=200, verbose=1, validation_split=0)
    # Saving the model
    score = spark_model.evaluate(feature_test, label_test,show_accuracy=True)

    while(score <= 0.5):
        # Training the Input Data set
        spark_model.train(rddataset, nb_epoch=10, batch_size=200, verbose=1, validation_split=0)
        print "LSTM model training done !!"
        score = spark_model.evaluate(feature_test, label_test,show_accuracy=True)
    print "Saving weights!!"
    outFilePath=os.environ.get('GATOR_SQUAD_HOME')
    outFilePath=outFilePath+"Weights/"+str(year)+"/"+str(month)+"/"+str(modelName)+"_my_model_weights.h5"
    spark_model.save_weights(outFilePath)
    print "LSTM model testing commencing !!"
    predicted1=spark_model.predict_classes(feature_test)
    df_confusion = pd.crosstab(label_test.flatten(), predicted1.flatten(), rownames=['Actual'], colnames=['Predicted'], margins=True)
    print df_confusion
예제 #2
0
class SparseGate(ModelFrame):
    def __init__(self, x_train, y_train, x_test, y_test, inputs,
                 spark_context):
        ModelFrame.__init__(self, x_train, y_train, x_test, y_test,
                            spark_context)
        self.gateModel = None
        self.inputs = inputs

    def gating_network(self):
        c1 = Conv2D(32, (3, 3),
                    padding='same',
                    kernel_regularizer=regularizers.l2(weight_decay),
                    input_shape=self.x_train.shape[1:],
                    name='gate1')(self.inputs)
        c2 = Activation('elu', name='gate2')(c1)
        c3 = BatchNormalization(name='gate3')(c2)
        c4 = Conv2D(32, (3, 3),
                    padding='same',
                    kernel_regularizer=regularizers.l2(weight_decay),
                    name='gate4')(c3)
        c5 = Activation('elu', name='gate5')(c4)
        c6 = BatchNormalization(name='gate6')(c5)
        c7 = MaxPooling2D(pool_size=(2, 2), name='gate7')(c6)
        c8 = Dropout(0.2, name='gate26')(c7)
        c9 = Conv2D(32 * 2, (3, 3),
                    name='gate8',
                    padding='same',
                    kernel_regularizer=regularizers.l2(weight_decay))(c8)
        c10 = Activation('elu', name='gate9')(c9)
        c11 = BatchNormalization(name='gate25')(c10)
        c12 = Conv2D(32 * 2, (3, 3),
                     name='gate10',
                     padding='same',
                     kernel_regularizer=regularizers.l2(weight_decay))(c11)
        c13 = Activation('elu', name='gate11')(c12)
        c14 = BatchNormalization(name='gate12')(c13)
        c15 = MaxPooling2D(pool_size=(2, 2), name='gate13')(c14)
        c16 = Dropout(0.3, name='gate14')(c15)

        c25 = Flatten(name='gate23')(c16)
        c26 = Dense(5, name='gate24', activation='elu')(c25)

        model = Model(inputs=self.inputs, outputs=c26)
        return model

    def create_gate_model(self, expert_models):
        gate_network = self.gating_network()
        merged = Lambda(lambda x: K.tf.transpose(
            sum(
                K.tf.transpose(x[i]) * x[0][:, i - 1] for i in range(
                    1, len(x)))))([gate_network.layers[-1].output] +
                                  [m.layers[-1].output for m in expert_models])
        b = Activation('softmax', name='gatex')(merged)
        model = Model(inputs=self.inputs, outputs=b)
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(),
                      metrics=['accuracy'])
        return model

    def train_gate(self, datagen, weights_file):
        model = self.gateModel
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(),
                      metrics=['accuracy'])
        print(model.summary())
        self.gateModel = SparkModel(model,
                                    frequency='epoch',
                                    mode='asynchronous')
        score = self.gateModel.master_network.evaluate(self.x_test,
                                                       self.y_test,
                                                       verbose=2,
                                                       batch_size=50)
        self.gateModel.fit(self.rdd, epochs=1, batch_size=50, verbose=1)
        self.gateModel = self.gateModel.master_network
        self.gateModel.save_weights(weights_file + '.hdf5')

        file = '../lib/output.txt'
        if os.path.exists(file):
            append_write = 'a'
        else:
            append_write = 'w'

        #score = self.gateModel.evaluate(self.x_test, self.y_test, verbose=2, batch_size=50)
        print("------------------------------")
        print("Score is:" + str(score[1]))
        print("-------------------------------")
        text_file = open(file, append_write)
        text_file.write("Score: %s" % score[1])
        text_file.close()

    def load_gate_weights(self,
                          model_old,
                          weights_file='../lib/weights/moe_full.hdf5'):
        model_old.load_weights(weights_file)
        for l in self.gateModel.layers:
            for b in model_old.layers:
                if (l.name == b.name):
                    l.set_weights(b.get_weights())
                    print("loaded gate layer " + str(l.name))