def main():
    logger.info("Start Training Pipeline")
    augmented = True
    if augmented:
        if not os.path.exists(
                os.path.join(METADATA_DIR_AUGMENTED_PROCESSED, 'data.pkl')):
            augment.main()
            # Read Data
        dataset = pd.read_pickle(
            os.path.join(METADATA_DIR_AUGMENTED_PROCESSED, 'data.pkl'))

    else:
        # Generate MetaData if not generated yet
        if not os.path.exists(os.path.join(METADATA_DIR_PROCESSED,
                                           'data.pkl')):
            generate.run()
        dataset = pd.read_pickle(
            os.path.join(METADATA_DIR_PROCESSED, 'data.pkl'))

    logger.info(f"Number of samples: {len(dataset)}")
    most_shape = get_most_shape(dataset)
    train_data, test_data = train_test_split(dataset,
                                             augmented=augmented,
                                             split_ratio=0.65)

    X_train, y_train = features_target_split(train_data)
    X_test, y_test = features_target_split(test_data)

    # Reshape for CNN input
    X_train, X_test = reshape_feature_CNN(X_train), reshape_feature_CNN(X_test)

    # Preserve y_test values
    y_test_values = y_test.copy()

    # One-Hot encoding for classes
    y_train, y_test = one_hot_encode(y_train), one_hot_encode(y_test)

    # Instance of CNN model
    cnn = CNN(most_shape)
    logger.info(str(cnn))

    cnn.train(X_train, y_train, X_test, y_test)
    cnn.evaluate(X_train, y_train, X_test, y_test)

    predictions = cnn.model.predict_classes(X_test)
    conf_matrix = confusion_matrix(y_test_values,
                                   predictions,
                                   labels=range(10))
    logger.info('Confusion Matrix for classes {}:\n{}'.format(
        CLASSES, conf_matrix))
    cnn.save_model()
        if not os.path.exists(os.path.join(PROCESSED_METADATA_DIR,
                                           'data.pkl')):
            run()
        dataset = pd.read_pickle(
            os.path.join(PROCESSED_METADATA_DIR, 'data.pkl'))

    print(f"Number of samples: {len(dataset)}")
    most_shape = get_most_shape(dataset)
    train_data, test_data = train_test_split(dataset,
                                             augmented=False,
                                             split_ratio=0.65)
    X_train, y_train = features_target_split(train_data)
    X_test, y_test = features_target_split(test_data)
    # Reshape for CNN input
    X_train, X_test = reshape_feature_CNN(X_train), reshape_feature_CNN(X_test)
    # Preserve y_test values
    y_test_values = y_test.copy()
    # One-Hot encoding for classes
    y_train, y_test = one_hot_encode(y_train), one_hot_encode(y_test)
    # Instance of CNN model
    cnn = CNN(most_shape)
    print(str(cnn))
    cnn.train(X_train, y_train, X_test, y_test)
    cnn.evaluate(X_train, y_train, X_test, y_test)
    predictions = cnn.model.predict_classes(X_test)
    conf_matrix = confusion_matrix(y_test_values,
                                   predictions,
                                   labels=range(10))
    print('Confusion Matrix for classes {}:\n{}'.format(CLASSES, conf_matrix))
    cnn.save_model()
Beispiel #3
0
# Normalize the data the training and val sets
train_df, val_df = AvgNormalization(df_train, df_val)

# Create windowed data sets and labels. Display summary.
w1 = WindowGenerator(input_width=24,
                     label_width=1,
                     shift=1,
                     train_df=train_df,
                     val_df=val_df,
                     label_columns=['Pressure'])
print(w1)

# Import models from source
MAX_EPOCHS = 20
val_performance = {}

# Testing CNN Model
history = compile_and_fit(CNN, w1, MAX_EPOCHS)
val_performance['CNN'] = CNN.evaluate(w1.val)
plot_loss(history, 'CNN_Model_Loss')

# Testing RNN Model
history = compile_and_fit(RNN, w1, MAX_EPOCHS)
val_performance['RNN'] = RNN.evaluate(w1.val)
plot_loss(history, 'RNN_Model_Loss')

# Performance Summary
for name, value in val_performance.items():
    print(f'{name:12s}: {value[1]:0.4f}')