예제 #1
0
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X_train, y_train)

print("Lowest validation loss: %f using %s" %
      (grid_result.best_score_, grid_result.best_params_))

# In[11]:

## TODO: Specify a CNN architecture
# Your model should accept 96x96 pixel graysale images in
# It should have a fully-connected output layer with 30 values (2 for each facial keypoint)

model = Sequential()
model.add(
    Conv2D(filters=64,
           kernel_size=2,
           strides=1,
           activation='relu',
           input_shape=input_shape))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=2, strides=1, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=2, strides=1, activation='relu'))

model.add(GlobalAveragePooling2D())

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())
r_grid_search = r_grid_search.fit(X_train, y_train)
r_best_parameters = r_grid_search.best_params_
r_best_accuracy = r_grid_search.best_score_
'''
    Creating the ANN
'''

# Initialising the ANN with sequence of layers (Could use a Graph)
regressor = Sequential()

# Adding the input layer and the first hidden layer
# optimal nodes in hidden layer is art (Tip: choose as avg of input+output)
regressor.add(
    Dense(units=4,
          kernel_initializer='uniform',
          activation='relu',
          input_dim=2))

# Adding the hidden layers
regressor.add(Dense(units=4, kernel_initializer='uniform', activation='relu'))
regressor.add(Dropout(rate=0.25))
regressor.add(Dense(units=4, kernel_initializer='uniform', activation='relu'))
regressor.add(Dropout(rate=0.25))

# Adding the output layer
# Probability for the outcome
regressor.add(Dense(units=1, kernel_initializer='uniform',
                    activation='linear'))

# Compiling the ANN
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X, y)

print("Best: %f using %s" %
      (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

model = Sequential()
model.add(
    Dense(19,
          input_dim=34,
          kernel_initializer='uniform',
          activation='linear',
          kernel_constraint=maxnorm(4)))
model.add(Dropout(0.2))
model.add(Dense(1, activation="linear"))
model.compile(loss='mean_squared_error', optimizer='adam')

model.fit(X, y, epochs=150, batch_size=10)

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

baseline_model = create_model(10)
estimator = KerasRegressor(build_fn=baseline_model,
                           epochs=100,
                           batch_size=5,
예제 #4
0
def LPI(dataset='RPI'):
    data_dim = 620
    timesteps = 1
    batch_size = 64
    epochs = 10
    X, labels = get_data(dataset)
    y, encoder = preprocess_labels(labels)

    num_cross_val = 5
    all_performance_lpa = []
    all_performance_rf = []
    all_performance_xgb = []
    all_performance_rse = []
    all_performance_blend1 = []
    all_performance = []

    all_labels = []
    all_prob = {}
    num_classifier = 3
    all_prob[0] = []
    all_prob[1] = []
    for fold in range(num_cross_val):
        train = []
        test = []
        train = np.array(
            [x for i, x in enumerate(X) if i % num_cross_val != fold])
        test = np.array(
            [x for i, x in enumerate(X) if i % num_cross_val == fold])
        train_label = np.array(
            [x for i, x in enumerate(y) if i % num_cross_val != fold])
        test_label = np.array(
            [x for i, x in enumerate(y) if i % num_cross_val == fold])
        train1 = np.reshape(train, (train.shape[0], 1, train.shape[1]))
        test1 = np.reshape(test, (test.shape[0], 1, test.shape[1]))

        real_labels = []
        for val in test_label:
            if val[0] == 1:
                real_labels.append(0)
            else:
                real_labels.append(1)

        train_label_new = []
        for val in train_label:
            if val[0] == 1:
                train_label_new.append(0)
            else:
                train_label_new.append(1)

        blend_train = np.zeros((
            train1.shape[0],
            num_classifier))  # Number of training data x Number of classifiers
        blend_test = np.zeros(
            (test1.shape[0],
             num_classifier))  # Number of testing data x Number of classifiers

        real_labels = []
        for val in test_label:
            if val[0] == 1:
                real_labels.append(0)
            else:
                real_labels.append(1)

        all_labels = all_labels + real_labels
        '''
        prefilter_train1 = xgb.DMatrix( prefilter_train, label=train_label_new)
        evallist  = [(prefilter_train1, 'train')]
        num_round = 10
        clf = xgb.train( plst, prefilter_train1, num_round, evallist )
        prefilter_test1 = xgb.DMatrix( prefilter_test)
        ae_y_pred_prob = clf.predict(prefilter_test1)
        '''
        tmp_aver = [0] * len(real_labels)

        print("Train...")

        svc = OneVsRestClassifier(SVC(kernel="linear",
                                      random_state=123,
                                      probability=True),
                                  n_jobs=-1)  #, C=1
        #svc=SVC(kernel='poly',degree=2,gamma=1,coef0=0)
        rfe = RFE(estimator=svc, n_features_to_select=200, step=1)
        rfe.fit(train, train_label_new)
        train2 = rfe.transform(train)
        test2 = rfe.transform(test)
        train11 = np.reshape(train2, (train2.shape[0], 1, train2.shape[1]))
        test11 = np.reshape(test2, (test2.shape[0], 1, test2.shape[1]))

        class_index = 0
        model = KerasRegressor(build_fn=LSTM_model, epochs=15, verbose=0)
        model.fit(train11, train_label, epochs=15, verbose=2)
        pred_prob = model.predict(test11)[:, 1]
        all_prob[class_index] = all_prob[class_index] + [
            val for val in pred_prob
        ]
        proba = transfer_label_from_prob(pred_prob)
        acc, precision, sensitivity, specificity, MCC = calculate_performace(
            len(real_labels), proba, real_labels)
        fpr_1, tpr_1, auc_thresholds = roc_curve(real_labels, pred_prob)
        auc_score_1 = auc(fpr_1, tpr_1)
        precision1, recall, threshods = precision_recall_curve(
            real_labels, pred_prob)
        aupr_score = auc(recall, precision1)
        print "LPA_DL :", acc, precision, sensitivity, specificity, MCC, auc_score_1, aupr_score
        all_performance_lpa.append([
            acc, precision, sensitivity, specificity, MCC, auc_score_1,
            aupr_score
        ])
        print '---' * 50

        model = Sequential()
        model.add(
            LSTM(64,
                 return_sequences=False,
                 input_shape=(timesteps, data_dim),
                 name='lstm1')
        )  #kernel_regularizer=regularizers.l2(0.0001),# returns a sequence of vectors of dimension 32
        model.add(Dropout(0.25, name='dropout'))
        #model.add(Dense(2, name='full_connect'))
        model.add(
            DropConnect(Dense(2,
                              activation='relu',
                              kernel_regularizer=regularizers.l2(0.0001),
                              bias_regularizer=regularizers.l2(0.0001)),
                        prob=0.25,
                        name='full_connect'))
        model.add(Activation('sigmoid'))
        model.summary()

        print('Compiling the Model...')
        model.compile(loss=huber, optimizer='adam', metrics=['accuracy'])

        es = EarlyStopping(monitor='val_loss',
                           mode='min',
                           verbose=1,
                           patience=5)
        model.fit(train1,
                  train_label,
                  batch_size=batch_size,
                  epochs=epochs,
                  callbacks=[es],
                  shuffle=True,
                  verbose=2)  #validation_split=0.1,
        class_index = class_index + 1
        proba = model.predict_proba(test1)[:, 1]
        tmp_aver = [val1 + val2 / 3 for val1, val2 in zip(proba, tmp_aver)]
        all_prob[class_index] = all_prob[class_index] + [val for val in proba]
        y_pred_xgb = transfer_label_from_prob(proba)

        real_labels = []
        for val in test_label:
            if val[0] == 1:
                real_labels.append(0)
            else:
                real_labels.append(1)

        #pdb.set_trace()
        acc, precision, sensitivity, specificity, MCC = calculate_performace(
            len(real_labels), y_pred_xgb, real_labels)
        fpr_1, tpr_1, auc_thresholds = roc_curve(real_labels, proba)
        auc_score_1 = auc(fpr_1, tpr_1)
        precision1, recall, pr_threshods = precision_recall_curve(
            real_labels, proba)
        aupr_score = auc(recall, precision1)
        print acc, precision, sensitivity, specificity, MCC, auc_score_1, aupr_score
        all_performance.append([
            acc, precision, sensitivity, specificity, MCC, auc_score_1,
            aupr_score
        ])
        print '---' * 50

    print 'mean performance of LPA_DL-FS'
    print np.mean(np.array(all_performance_lpa), axis=0)
    print '---' * 50
    print 'mean performance of LPA_DL'
    print np.mean(np.array(all_performance), axis=0)
    print '---' * 50

    Figure = plt.figure()
    plot_roc_curve(all_labels, all_prob[0], 'LPI_WFS')
    plot_roc_curve(all_labels, all_prob[1], 'LPI_NFS')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([-0.05, 1.05])
    plt.ylim([0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC')
    plt.legend(loc="lower right")
    plt.show()
예제 #5
0
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))
model.add(Dropout(0.2))

# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())

# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
def run_lstm(airline):
    # parameters of LSTM
    TRAIN_SPLIT = 364*24 
    BATCH_SIZE = 24 
    BUFFER_SIZE = 5000
    EVALUATION_INTERVAL = 364/BATCH_SIZE 
    EPOCHS = 200
    accuracy_threshold = 0.25
    
    n_iter_search = 16 # Number of parameter settings that are sampled.
    
    # Parameters to evaluate in order to find best parameters for each model
    optimizers = ['rmsprop', 'adam', 'adadelta'] 
    init = ['glorot_uniform', 'normal', 'uniform']
    EPOCHS = np.array([100, 200, 500])
    param_grid = dict(optimizer=optimizers, nb_epoch=EPOCHS, init=init)
    
    
    # past history valid options: 24-future_target,2(24)-future_target,3(24)-future_target,...
    past_history = 8
    # future target valid options: 12,11,10,...
    future_target = 24 - past_history
    # note that 'past history' + 'future target' must be a multiple of 24, i.e. 24, 48, 72, ...
    
    # this is not relevant in our problem, it is always 1 (to make a prediction for each hour)
    STEP = 1 
    
    # Map airline to airline name
    airline_name = airlines_cv[airlines_cv['Marketing_Airline_Network']==airline]['Airline'].reset_index()
    airline_name = airline_name['Airline'][0]
    
    # select an airline from the dataset (for example: AA)
    data = full_data[full_data['Marketing_Airline_Network'] == airline]
    data.columns
    
    # total delay for all airlines
    df = data[['Date','Hour','Weekday','ArrDelay3AM','DepDelay3AM']].copy()
    
    # Create variables for seasons and holidays
    df['season'] = pd.to_datetime(df['Date']).dt.quarter
    dates = df['Date'].values
    holiday = np.empty(dates.shape[0])
    for i in range(0,dates.shape[0]):
        if dt.datetime.strptime(dates[i],'%Y-%m-%d') in holidays.US():
            holiday[i] = 1
        else:
            holiday[i] = 0
    df['holiday'] = holiday
    
    df = df[df['Date'] != '2018-03-11']
    #df = df[df['Date'] != '2019-03-10']
    
    
    # don't change the seed so that we can compare the results with each other
    tf.random.set_seed(13)
    #tf.set_random_seed(13) # use this instead depending on version of TensorFlow
    
    #creating time steps
    def create_time_steps(length):
      return list(range(-length, 0))
    
    #def for plotting
    def show_plot(plot_data, delta, title):
      labels = ['History', 'True Future', 'Model Prediction']
      marker = ['.-', 'rx', 'go']
      time_steps = create_time_steps(plot_data[0].shape[0])
      if delta:
        future = delta
      else:
        future = 0
    
      plt.title(title)
      for i, x in enumerate(plot_data):
        if i:
          plt.plot(future, plot_data[i], marker[i], markersize=10,
                   label=labels[i])
        else:
          plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
      plt.legend()
      plt.xlim([time_steps[0], (future+5)*2])
      plt.ylabel('Total Delay (min)')
      plt.xlabel('Time-Step')
      return plt
    
    #def for baseline
    def baseline(history):
      return np.mean(history)
    
    
    ######## multivariate
    features_considered = ['ArrDelay3AM','DepDelay3AM','Weekday','season','holiday']
    features = df[features_considered]
    features.index = df[['Date','Hour']]
    features.head()
    
    dataset = features.values
    data_mean = dataset[:TRAIN_SPLIT].mean(axis=0)
    data_std = dataset[:TRAIN_SPLIT].std(axis=0)
    
    for i in range(0,2):
        dataset[:,i] = (dataset[:,i]-data_mean[i])/data_std[i]
    
    def multivariate_data(dataset, target, start_index, end_index, history_size,
                          target_size, step, single_step=False):
      data = []
      labels = []
    
      start_index = start_index + history_size
      if end_index is None:
        end_index = len(dataset)
    
      for i in range(start_index, end_index, 24):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])
    
        if single_step:
          labels.append(target[i+target_size-1]) #added -1
        else:
          labels.append(target[i:i+target_size])
    
      return np.array(data), np.array(labels)
    
    #def for plotting the error
    def plot_train_history(history, title):
      loss = history.history['loss']
      val_loss = history.history['val_loss']
    
      epochs = range(len(loss))
    
      plt.figure()
    
      plt.plot(epochs, loss, 'b', label='Training loss')
      plt.plot(epochs, val_loss, 'r', label='Validation loss')
      plt.xlabel('Epoch')
      plt.ylabel('Mean Absolute Error')
      plt.title(title)
      plt.legend()
    
      plt.show()
    
    #multivariate_data(dataset, target, start_index, end_index, history_size,target_size, step, single_step=False)
    #preparing the dataset
    x_train_multi, y_train_multi = multivariate_data(dataset, dataset[:, 0], 0,
                                                     TRAIN_SPLIT, past_history,
                                                     future_target, STEP)
    x_val_multi, y_val_multi = multivariate_data(dataset, dataset[:, 0],
                                                 TRAIN_SPLIT, None, past_history,
                                                 future_target, STEP)
    
    print ('Single window of past history : {}'.format(x_train_multi[0].shape))
    print ('Target delay to predict : {}'.format(y_train_multi[0].shape))
    
    
    #definition for multi step plot - this shows the predictions for an individual day
    def multi_step_plot(history, true_future, prediction):
      #plt.figure(figsize=(12, 6))
      plt.figure(figsize=(8, 6))
      num_in = create_time_steps(len(history))
      num_out = len(true_future)
    
      plt.plot(num_in, np.array(history[:, 0]*data_std[0]+data_mean[0]), label='History')
      plt.plot(np.arange(num_out)/STEP, np.array(true_future)*data_std[0]+data_mean[0],color='black',
               label='True Future')
      if prediction.any():
        plt.plot(np.arange(num_out)/STEP, np.array(prediction)*data_std[0]+data_mean[0], color='red', ls='dashed',
                 label='Predicted Future')
      plt.legend(loc='upper left')
      plt.xlabel('Time of Day')
      plt.xticks(range(-past_history+2,future_target,5),range(2,24,5))
      plt.ylabel('Cumulative Delay (Minute)')
      plt.show()
    
    #train
    train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
    train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
    
    #validation
    val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
    val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()
    
    # Create model for gridsearch analysis of different parameters
    def create_model(BUFFER_SIZE=BUFFER_SIZE,optimizer='rmsprop', init='glorot_uniform'):
        
        #Building the LSTM model
        multi_step_model = tf.keras.models.Sequential()
        multi_step_model.add(tf.keras.layers.LSTM(32,
                                                  return_sequences=True,
                                                  input_shape=x_train_multi.shape[-2:]))
        multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
        multi_step_model.add(tf.keras.layers.Dense(25))
        multi_step_model.add(tf.keras.layers.Dense(future_target))
        
        multi_step_model.compile(optimizer=optimizer, loss='mean_squared_error',metrics=["mse","mae"])
        return multi_step_model
    
    # Gridsearch analysis to evaluate each of the parameters
    multi_step_model = KerasRegressor(build_fn=create_model)
    random_search = RandomizedSearchCV(estimator=multi_step_model, 
                                       param_distributions=param_grid,
                                       n_iter=n_iter_search)
    random_search.fit(x_train_multi, y_train_multi)
    print("Best: %f using %s" % (random_search.best_score_, random_search.best_params_))
    # Create dataframe with best parameters
    parameters = pd.DataFrame(random_search.best_params_, index=[0])
    parameters['Airline']=airline
    parameters = parameters[['Airline','optimizer','nb_epoch','init']]
    optimizer = parameters['optimizer'][0]
    EPOCHS = parameters['nb_epoch'][0]
    init = parameters['init'][0]
    
    
    # Table of best parameters and performance
    def render_mpl_table(data, col_width=3.0, row_height=0.625, font_size=14,
                         header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
                         bbox=[0, 0, 1, 1], header_columns=0,
                         ax=None, **kwargs):
        if ax is None:
            size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
            fig, ax = plt.subplots(figsize=size)
            plt.title('%s Best Parameters & Performance' % airline_name,fontdict=dict(fontsize=16,fontweight='bold'),loc='center')
            ax.axis('off')
    
        mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, loc='center',cellLoc='center',**kwargs)
    
        mpl_table.auto_set_font_size(False)
        mpl_table.set_fontsize(font_size)
    
        for k, cell in  six.iteritems(mpl_table._cells):
            cell.set_edgecolor(edge_color)
            if k[0] == 0 or k[1] < header_columns:
                cell.set_text_props(weight='bold', color='w')
                cell.set_facecolor(header_color)
            else:
                cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
        plt.savefig('%s_parameters_performance.png' % airline,bbox_inches='tight')
        return ax
    
    
    
    #Building the LSTM model using best model parameters
    multi_step_model = tf.keras.models.Sequential()
    multi_step_model.add(tf.keras.layers.LSTM(32,
                                              return_sequences=True,
                                              input_shape=x_train_multi.shape[-2:]))
    multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
    multi_step_model.add(tf.keras.layers.Dense(25))
    multi_step_model.add(tf.keras.layers.Dense(future_target))
    
    multi_step_model.compile(optimizer=optimizer, loss='mean_squared_error',metrics=["mse","mae"])
    
    
    
    
    for x, y in val_data_multi.take(1):
      print (multi_step_model.predict(x).shape)
    
    multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
                                              steps_per_epoch=EVALUATION_INTERVAL,
                                              validation_data=val_data_multi,
                                              validation_steps=18)
    
    # plot training and validation loss
    plot_train_history(multi_step_history, 'Multi-Step Training and validation loss')
    
    # show sample results
    #rmse
    rmse = np.sqrt(multi_step_model.evaluate(x_val_multi,y_val_multi))
    print('RMSE: %s' % rmse)
    nrmse = rmse*data_std[0]
    print('NRMSE: %s' % nrmse)
    
    parameters['RMSE'] = rmse[0].round(3)
    parameters['NRMSE'] = nrmse[0].round(3)
    
    
    val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
    val_data_multi = val_data_multi.batch(1)
    
    #plotting the sample predictions
    for x, y in val_data_multi.take(1):
      multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
    
    pred_data=pd.DataFrame([])
    est_date = dt.date(2019, 1, 1)
    # Consolidate true and predictions into a single dataframe
    for x, y in val_data_multi.take(365):
        true_val = y[:,15]*data_std[0]+data_mean[0]
        prediction = np.array(multi_step_model.predict(x)[:,15]*data_std[0]+data_mean[0])
        pred_data = pred_data.append(pd.DataFrame({'Date':est_date,'True Value':true_val,'Predicted Value':prediction}, index=[0]),ignore_index=True)
        est_date = est_date + timedelta(days=1) 
    pred_data['Predicted Value'] = round(pred_data['Predicted Value'])
    
    print(pred_data)
    
    # Begin labeling data with old labels to test accuracy
    labels = pd.read_csv('testing_cumulative_data_%s_labeled.csv' % airline)
    labels['Date'] = pd.to_datetime(labels['Date'])
    
    cluster = pred_data.copy()
    cluster['Predicted Cluster'] = ''
    cluster['True Cluster'] = ''
    
    cluster['True Cluster']=cluster['Date'].map(dict(zip(labels['Date'],labels['Cluster_Num'])))
    
    # Find cutoffs in order to label data based on predictions and compare with actual clusters
    cutoffs = pd.DataFrame([])
    for i in cluster['True Cluster'].unique():
        data = cluster[cluster['True Cluster']==i]
        val = data['True Value'].min()
        cutoffs = cutoffs.append(pd.DataFrame({'Cluster':i,'Cutoffs':val}, index=[0]),ignore_index=True)
    cutoffs = cutoffs.sort_values(by=['Cluster']).reset_index()
    cutoffs = cutoffs[['Cluster','Cutoffs']]
    
    for i in cutoffs['Cluster'].unique():
        cluster.loc[cluster['Predicted Value']>cutoffs['Cutoffs'][i],'Predicted Cluster']=i
    cluster.loc[cluster['Predicted Value']<=cutoffs['Cutoffs'][1],'Predicted Cluster']=0
    
    meltdown_cutoff = max(cutoffs['Cutoffs'])
    # Plot scatter of predictions
    fig,ax = plt.subplots(figsize=(12,8))
    meltdown = plt.scatter(pred_data[pred_data['True Value']>=meltdown_cutoff]['True Value'],pred_data[pred_data['True Value']>=meltdown_cutoff]['Predicted Value'],color='blue')
    normal = plt.scatter(pred_data[pred_data['True Value']<meltdown_cutoff]['True Value'],pred_data[pred_data['True Value']<meltdown_cutoff]['Predicted Value'],color='gray')
    plt.axhline(y=meltdown_cutoff, color='r', linestyle='-')
    plt.legend((meltdown,normal),
               ('Meltdown', 'Normal'),
               scatterpoints=1,
               loc='upper left',
               ncol=1,
               fontsize=12)
    plt.title('LSTM Predictions',fontsize=16)
    plt.xlabel('True Values',fontsize=14)
    plt.ylabel('Predicted Values',fontsize=14)
    plt.ylim(ymin=0)  
    plt.xlim(xmin=0) 
    plt.axis('square')
    ax.plot([0, 1], [0, 1], transform=ax.transAxes)
    plt.savefig('%s_scatter_plot.png' % airline)
    
    #Define the function used to create a Confusion Matrix plot
    def plot_confusion_matrix(cm, classes,
                              normalize=False,
                              title='Confusion matrix',
                              cmap=plt.cm.Blues):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print("Normalized confusion matrix")
        else:
            print('Confusion matrix, without normalization')
    
        print(cm)
    
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title(title, fontdict = dict(fontsize=24))
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, fontsize = 18, rotation=45)
        plt.yticks(tick_marks, classes, fontsize = 18)
    
        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, format(cm[i, j], fmt),
                     fontsize = 20,
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        plt.grid(b=None)    
        plt.tight_layout()
        plt.ylabel('True label', fontsize = 18)
        plt.xlabel('Predicted label', fontsize = 18)
    
    y_test = cluster['True Cluster']
    y_pred = cluster['Predicted Cluster']
    
    # Model Accuracy, how often is the classifier correct?
    print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
    
    parameters['Cluster Acc.']=metrics.accuracy_score(y_test,y_pred).round(3)
    
    
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    classificationReport = classification_report(y_test, y_pred)
    cr_lines = classificationReport.split('/n')
    cr_aveTotal = cr_lines[len(cr_lines) - 2].split()
    ave_recall = float(cr_aveTotal[len(cr_aveTotal) - 3])

    parameters['Cluster Recall'] = ave_recall

    def plot_classification_report(cr, title='Classification Report ', with_avg_total=False, cmap=plt.cm.Blues):
        lines = cr.split('\n')
        classes = []
        plotMat = []
        for line in lines[2 : (len(lines) - 3)]:         #print(line)
            t = line.split()         # print(t)         
            if(len(t)==0):      
                break
            classes.append(t[0])
            v = [float(x) for x in t[1: len(t) - 1]]
            print(v)
            plotMat.append(v)
        if with_avg_total:
            aveTotal = lines[len(lines) - 2].split()
            classes.append('avg/total')
            vAveTotal = [float(x) for x in aveTotal[2:len(aveTotal) - 1]]
            plotMat.append(vAveTotal)

        plt.figure()
        plt.imshow(plotMat, interpolation='nearest', cmap=cmap)
        plt.title(title, fontsize=16)
        plt.colorbar()
        x_tick_marks = np.arange(3)
        y_tick_marks = np.arange(len(classes))
        plt.xticks(x_tick_marks, ['Precision', 'Recall', 'F1-Score'])
        plt.yticks(y_tick_marks, classes)
        plt.grid(b=None)
        plt.tight_layout()
        plt.ylabel('Classes', fontsize=14)
        plt.xlabel('Measures', fontsize=14)
        plt.savefig('%s_classif_report.png' % airline, bbox_inches='tight')
    
    plot_classification_report(classificationReport, with_avg_total=True)
    
    
    
    #Compute confusion matrix
    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)
    
    #Create labels that correspond with our respective cluster labels
    if max(cluster['True Cluster'])==2:
        labs=['Good','Normal','Meltdown']
    if max(cluster['True Cluster'])==3:
        labs=['Good','Normal','Bad','Meltdown']
    if max(cluster['True Cluster'])==4:
        labs=['Great', 'Good','Normal','Bad','Meltdown']
    if max(cluster['True Cluster'])==5:
        labs=['Great', 'Good','Normal','Bad','Very Bad','Meltdown']
    
    #Plot non-normalized confusion matrix to show counts of predicted vs. actual clusters
    plt.figure()
    plt.grid(b=None)
    plot_confusion_matrix(cnf_matrix, classes=labs,
                          title='Confusion matrix, without normalization')
    plt.savefig('%s_confusion_matrix_count.png' % airline)
    
    #Plot normalized confusion matrix to show percentage of classifications in predicted vs. actual clusters
    plt.figure()
    plt.figure(figsize=(11,7))
    plot_confusion_matrix(cnf_matrix, classes=labs, normalize=True,
                          title='%s LSTM Model \nNormalized Confusion Matrix' % airline_name)
    plt.grid(b=None)
    plt.savefig('%s_confusion_matrix.png' % airline)
 
    plt.figure()       
    render_mpl_table(parameters, header_columns=0, col_width=2.0)