Ejemplo n.º 1
0
def main():
    """ Load images.
    Extract HOG feature descriptors.

    """
    # Load stored data
    X_train = np.load('../data/augment/ImageAugment_input.npy')
    print('=== TRAIN DATA ===')
    print(X_train.shape)

    X_test = np.load('../data/test/ImageTest_input.npy')
    print('=== TEST DATA ===')
    print(X_test.shape)

    print("Extracting HOG features...")
    RF_train = np.zeros([len(X_train), 32400])
    for i in range(len(X_train)):
        RF_train[i] = hog_feature(X_train[i])
    print("FEATURE DESCRIPTORS")
    print(RF_train.shape)

    RF_test = np.zeros([len(X_test), 32400])
    for i in range(len(X_test)):
        RF_test[i] = hog_feature(X_test[i])
    print(RF_test.shape)

    # Save data
    make_folder('../data/processed')
    np.save('../data/processed/ImageTestHOG_input.npy', RF_test)
    np.save('../data/processed/ImageTrainHOG_input.npy', RF_train)
def main():
    """ Load the data.
    Train SVM model using linear kernel.
    Print accuracy on test data.

    """
    # Load stored data
    X_train = np.load('../data/processed/ImageTrainHOG_input.npy')
    y_train = np.load('../data/augment/DiseaseAugment_input.npy')
    print("=== TRAIN DATA ===")
    print(X_train.shape)
    print(y_train.shape)

    X_test = np.load('../data/processed/ImageTestHOG_input.npy')
    y_test = np.load('../data/test/DiseaseTest_input.npy')
    print("=== TEST DATA ===")
    print(X_test.shape)
    print(y_test.shape)

    # Classifier
    svm_model = LinearSVC(C=0.01)
    svm_model.fit(X_train, y_train)
    print(svm_model.score(X_test, y_test))

    make_folder('../results/models')
    filename = '../results/models/SVM_model.sav'
    joblib.dump(svm_model, filename)
Ejemplo n.º 3
0
def main():
    """ Load data.
    Train random forest model.
    Print accuracy on test data.

    """
    # Load stored data
    X_train = np.load('../data/processed/ImageTrainHOG_input.npy')
    y_train = np.load('../data/augment/DiseaseAugment_input.npy')
    print("=== TRAIN DATA ===")
    print(X_train.shape)
    print(y_train.shape)

    X_test = np.load('../data/processed/ImageTestHOG_input.npy')
    y_test = np.load('../data/test/DiseaseTest_input.npy')
    print("=== TEST DATA ===")
    print(X_test.shape)
    print(y_test.shape)

    # Classifier
    Random_classifier = RandomForestClassifier(n_estimators=500,
                                               max_depth=35,
                                               n_jobs=-1,
                                               warm_start=True,
                                               oob_score=True,
                                               max_features='sqrt')

    Random_classifier.fit(X_train, y_train)
    print(Random_classifier.score(X_test, y_test))

    make_folder('../results/models')
    filename = '../results/models/Random_model.sav'
    joblib.dump(Random_classifier, filename)
Ejemplo n.º 4
0
def con_matrix(model, x, y):
    """ Plot confusion matrix for the given model.
    Save the png in the output folder.

    Args:
        model (str): model for which visualization needs to be plot.

        x (numpy array): test images.

        y (numpy array): test labels.

    """
    corr = []
    if model == "random_forest":
        loaded_model = joblib.load(os.path.join(ROOT_DIR, 'Random_model.sav'))
        classifier_prediction = loaded_model.predict(x)
        corr = confusion_matrix(y, classifier_prediction)

    elif model == "svm":
        loaded_model = joblib.load(os.path.join(ROOT_DIR, 'SVM_model.sav'))
        classifier_prediction = loaded_model.predict(x)
        corr = confusion_matrix(y, classifier_prediction)

    elif model == "majority_voting":
        classifier_prediction = np.load(os.path.join(ROOT_DIR, 'Ensemble.npy'))
        corr = confusion_matrix(y, classifier_prediction)

    elif model == "stacked_prediction":
        labeler = LabelEncoder()
        labeler.fit(y)
        loaded_model = load_model(os.path.join(ROOT_DIR, 'custom_ensemble.h5'))
        y_prediction = loaded_model.predict(
            np.load('data/test/X_test_ensemble.npy'))
        prediction = np.argmax(y_prediction, axis=-1)
        prediction = labeler.inverse_transform(prediction)
        corr = confusion_matrix(y, prediction)

    make_confusion_matrix(
        corr,
        categories=['blackrot', 'ecsa', 'healthy', 'leafblight', 'pmildew'],
        count=True,
        percent=True,
        color_bar=False,
        xy_ticks=True,
        xy_plot_labels=True,
        sum_stats=True,
        fig_size=(8, 6),
        c_map='OrRd',
        title='Confusion matrix')
    # error correction - cropped heat map
    b, t = plt.ylim()  # discover the values for bottom and top
    b += 0.5  # Add 0.5 to the bottom
    t -= 0.5  # Subtract 0.5 from the top
    plt.ylim(b, t)  # update the ylim(bottom, top) values

    make_folder('results/visualization')
    plt.savefig('results/visualization/confusion_matrix_{}.png'.format(model),
                bbox_inches='tight')
Ejemplo n.º 5
0
def tree():
    """ Plot the tree for random forest model.
    Save the dot file in output folder.
    Convert dot file to png by using the command:
    'dot -Tpng tree.dot -o tree.png'

    """
    model = joblib.load(os.path.join(ROOT_DIR, 'Random_model.sav'))
    tree_num = model.estimators_
    make_folder('results/visualization')
    for tree_in_forest in tree_num:
        export_graphviz(tree_in_forest,
                        out_file='results/visualization/tree.dot',
                        filled=True,
                        rounded=True,
                        precision=2)
Ejemplo n.º 6
0
def plot(image, label, index, model, x_hog):
    """ Display image, true label and predicted label.
    Save the result in the output folder

    Args:
        image (numpy array): image to predict.

        label (numpy array): true labels of corresponding images.

        index (int): index of the test image entered by the user.

        model (str): model to use. (Entered by user)

        x_hog (numpy array): feature descriptors of the image.

    """
    plt.figure(figsize=(8, 6))
    plt.imshow(image[index])
    plt.axis('off')
    plt.title('True label: {}'.format(label[index]),
              fontdict={
                  'fontweight': 'bold',
                  'fontsize': 'x-large'
              })
    predictions, percent = model_predict(model, image, x_hog, label)
    if model == "majority_voting":
        if predictions[index] == label[index]:
            plt.suptitle('Predicted label: {}'.format(predictions[index]),
                         color="green")
        else:
            plt.suptitle('Predicted label: {}'.format(predictions[index]),
                         color="red")

    else:
        if predictions[index] == label[index]:
            plt.suptitle('Predicted label: {} ({:.2f} %)'.format(
                predictions[index],
                np.max(percent[index]) * 100),
                         color="green")
        else:
            plt.suptitle('Predicted label: {} ({:.2f} %)'.format(
                predictions[index],
                np.max(percent[index]) * 100),
                         color="red")

    make_folder('results/visualization')
    plt.savefig('results/visualization/app.png', bbox_inches='tight')
Ejemplo n.º 7
0
def roc(x, y):
    """ Plot ROC-AUC plot for random forest model.
    Save the image in output folder.

    Args:
        x (numpy array): test images.

        y (numpy array): test labels.

    """
    model = joblib.load(os.path.join(ROOT_DIR, 'Random_model.sav'))

    visualizer = yellowbrick.classifier.ROCAUC(model,
                                               classes=[
                                                   'healthy', 'leaf_blight',
                                                   'ecsa', 'black rot',
                                                   'powdery mildew'
                                               ])
    visualizer.score(x, y)
    ax = visualizer.show()
    make_folder('results/visualization')
    ax.figure.savefig('results/visualization/auc_roc.png')
Ejemplo n.º 8
0
def plot(model):
    """ Plot the accuracy and loss curve for the neural networks.
    Save file in the output folder.

    Args:
        model (str): model for which visualization needs to be plot

    """
    history_custom = Hist()
    if model == "cnn_custom":
        history_custom = pickle.load(
            open(os.path.join(ROOT_DIR, 'custom_training_history.pkl'), 'rb'))

    elif model == "vgg":
        history_custom = pickle.load(
            open(os.path.join(ROOT_DIR, 'vgg16_training_history.pkl'), 'rb'))

    # Plot training & validation accuracy values
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[15, 8])
    ax1.plot(history_custom.history['acc'])
    ax1.plot(history_custom.history['val_acc'])
    ax1.set_title('Model accuracy')
    ax1.set_ylabel('Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.legend(['Train', 'Validation'], loc='lower right')

    # Plot training & validation loss values
    ax2.plot(history_custom.history['loss'])
    ax2.plot(history_custom.history['val_loss'])
    ax2.set_title('Model loss')
    ax2.set_ylabel('Loss')
    ax2.set_xlabel('Epoch')
    ax2.legend(['Train', 'Validation'], loc='upper right')

    make_folder('results/visualization')
    plt.savefig('results/visualization/acc_loss_{}.png'.format(model))
Ejemplo n.º 9
0
def main():
    """ Load images and json files from all folders and concatenate to form a single array.
    Shuffle the array.
    Split into test and train data sets.

    """
    print('INFO: Extracting json data...')
    # Accumulate data from json files
    json_df_images = get_json_data(os.path.join(ROOT_DIR, 'images/'))
    json_df_positive = get_json_data(os.path.join(ROOT_DIR, 'positive/'))
    json_df_healthy = get_json_data(os.path.join(ROOT_DIR, 'healthy/'))
    json_df_team4 = get_json_data(os.path.join(ROOT_DIR, 'team4/'))
    json_df_team4_br = get_json_data(os.path.join(ROOT_DIR, 'team4_br/'))
    json_df_leaf_blight = get_json_data(os.path.join(ROOT_DIR, 'leaf_blight/'))

    # Accumulate data set from all folders
    print('INFO: Extracting images and corresponding labels...')
    array1, disease1 = get_images(os.path.join(ROOT_DIR, 'Grape/Black_rot/'),
                                  name='black rot')
    array2, disease2 = get_images(os.path.join(ROOT_DIR, 'Grape/Esca/'),
                                  name='ecsa')
    array3, disease3 = get_images(os.path.join(ROOT_DIR, 'Grape/Leaf_blight/'),
                                  name='leaf_blight')
    array4, disease4 = get_images(os.path.join(ROOT_DIR, 'Grape/healthy/'),
                                  name='healthy')
    array5, disease5 = get_images(os.path.join(ROOT_DIR, 'images/'),
                                  js=json_df_images)
    array6, disease6 = get_images(os.path.join(ROOT_DIR, 'positive/'),
                                  js=json_df_positive)
    array7, disease7 = get_images(os.path.join(ROOT_DIR, 'healthy/'),
                                  js=json_df_healthy)
    array8, disease8 = get_images(os.path.join(ROOT_DIR, 'team4/'),
                                  js=json_df_team4)
    array9, disease9 = get_images(os.path.join(ROOT_DIR, 'team4_br/'),
                                  js=json_df_team4_br)
    array10, disease10 = get_images(os.path.join(ROOT_DIR, 'leaf_blight/'),
                                    js=json_df_leaf_blight)

    # Concatenate data
    disease_arr = np.concatenate(
        (disease1, disease2, disease3, disease4, disease5, disease6, disease7,
         disease8, disease9, disease10),
        axis=0)
    print('=== TOTAL DATA ===')
    print(disease_arr.shape)
    img_arr = np.concatenate((array1, array2, array3, array4, array5, array6,
                              array7, array8, array9, array10),
                             axis=0)
    print(img_arr.shape)

    # Shuffle data
    img_arr, disease_arr = shuffle(img_arr, disease_arr, random_state=42)
    print(np.unique(disease_arr))

    # split train set and test set
    X_train, X_test, y_train, y_test = train_test_split(img_arr,
                                                        disease_arr,
                                                        test_size=0.2,
                                                        random_state=42)
    print('=== TRAIN TEST SPLIT ===')
    print(X_test.shape)
    print(X_train.shape)

    # Save data
    make_folder('../data/test')
    make_folder('../data/intermediate')
    np.save('../data/test/ImageTest_input.npy', X_test)
    np.save('../data/test/DiseaseTest_input.npy', y_test)
    np.save('../data/intermediate/ImageTrain_input.npy', X_train)
    np.save('../data/intermediate/DiseaseTrain_input.npy', y_train)
Ejemplo n.º 10
0
def main():
    """ Load data.
    Normalize and encode.
    Train custom CNN model.
    Print accuracy on test data.

    """
    # Load stored data
    X_train = np.load('../data/augment/ImageAugment_input.npy')
    y_train = np.load('../data/augment/DiseaseAugment_input.npy')
    print("=== TRAIN DATA ===")
    print(X_train.shape)
    print(y_train.shape)

    X_test = np.load('../data/test/ImageTest_input.npy')
    y_test = np.load('../data/test/DiseaseTest_input.npy')
    print("=== TEST DATA ===")
    print(X_test.shape)
    print(y_test.shape)

    # hot encoding of labels
    y_train = encoder(y_train, NUM_CLASSES)
    y_test = encoder(y_test, NUM_CLASSES)

    # Input normalization
    X_train = (X_train / 255.0).astype(np.float32)
    X_test = (X_test / 255.0).astype(np.float32)

    # Custom CNN model
    model_custom = Sequential(
        (layers.Conv2D(32,
                       kernel_size=(3, 3),
                       padding='same',
                       input_shape=(180, 180, 3)), layers.Activation('relu'),
         layers.Conv2D(32, kernel_size=(3, 3), padding='same'),
         layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)),
         layers.Conv2D(32, kernel_size=(3, 3),
                       padding='same'), layers.Activation('relu'),
         layers.Conv2D(32, kernel_size=(3, 3), padding='same'),
         layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)),
         layers.Conv2D(32, kernel_size=(3, 3),
                       padding='same'), layers.Activation('relu'),
         layers.Conv2D(32, kernel_size=(3, 3), padding='same'),
         layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)),
         layers.Conv2D(32, kernel_size=(3, 3),
                       padding='same'), layers.Activation('relu'),
         layers.Conv2D(32, kernel_size=(3, 3), padding='same'),
         layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)),
         layers.Conv2D(32, kernel_size=(3, 3),
                       padding='same'), layers.Activation('relu'),
         layers.Conv2D(32, kernel_size=(3, 3),
                       padding='same'), layers.Activation('relu'),
         layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(),
         layers.Dropout(0.5), layers.Dense(128), layers.Activation('relu'),
         layers.Dense(NUM_CLASSES, activation='softmax')))

    model_custom.compile(optimizer=Adam(),
                         loss="categorical_crossentropy",
                         metrics=['accuracy'])

    # Learning rate decay
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                     factor=0.2,
                                                     patience=5,
                                                     min_lr=0.00001)
    history_custom = model_custom.fit(X_train,
                                      y_train,
                                      batch_size=8,
                                      epochs=1,
                                      verbose=1,
                                      validation_split=.1,
                                      callbacks=[reduce_lr])
    scores = model_custom.evaluate(X_test, y_test, verbose=0)
    print("========================")
    print("TEST SET: %s: %.2f%%" %
          (model_custom.metrics_names[1], scores[1] * 100))
    print("========================")

    print(model_custom.summary())

    # save model
    make_folder('../results/models/')
    model_custom.save('../results/models/custom.h5')

    history = dict()
    history['acc'] = history_custom.history['acc']
    history['val_acc'] = history_custom.history['val_acc']
    history['loss'] = history_custom.history['loss']
    history['val_loss'] = history_custom.history['val_loss']

    hist = Hist()
    setattr(hist, 'history', history)
    pickle.dump(hist,
                open('../results/models/custom_training_history.pkl', 'wb'))
Ejemplo n.º 11
0
def main():
    """ Load data.
    Normalize and encode.
    Train CNN-VGG16 model.
    Print accuracy on test data.

    """
    # Load stored data
    X_train = np.load('../data/augment/ImageAugment_input.npy')
    y_train = np.load('../data/augment/DiseaseAugment_input.npy')
    print("=== TRAIN DATA ===")
    print(X_train.shape)
    print(y_train.shape)

    X_test = np.load('../data/test/ImageTest_input.npy')
    y_test = np.load('../data/test/DiseaseTest_input.npy')
    print("=== TEST DATA ===")
    print(X_test.shape)
    print(y_test.shape)

    # hot encoding of labels
    y_train = encoder(y_train, NUM_CLASSES)
    y_test = encoder(y_test, NUM_CLASSES)

    # Input normalization
    X_train = (X_train / 255.0).astype(np.float32)
    X_test = (X_test / 255.0).astype(np.float32)

    # VGG16 CNN model
    IMG_SHAPE = (180, 180, 3)
    VGG16_MODEL = tf.keras.applications.VGG16(input_shape=IMG_SHAPE,
                                              include_top=False,
                                              weights='imagenet')

    VGG16_MODEL.trainable = False
    global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
    prediction_layer = Dense(NUM_CLASSES, activation='softmax')

    model_vgg16 = Sequential([
        VGG16_MODEL,
        Conv2D(512, kernel_size=(3, 3), padding='same'),
        Activation('relu'),
        Conv2D(1024, kernel_size=(3, 3), padding='same'), global_average_layer,
        prediction_layer
    ])

    model_vgg16.compile(optimizer=Adam(),
                        loss="categorical_crossentropy",
                        metrics=['accuracy'])

    # Learning rate decay
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                     factor=0.2,
                                                     patience=5,
                                                     min_lr=0.00001)
    history_custom = model_vgg16.fit(X_train,
                                     y_train,
                                     batch_size=8,
                                     epochs=20,
                                     verbose=1,
                                     validation_split=.1,
                                     callbacks=[reduce_lr])
    scores = model_vgg16.evaluate(X_test, y_test, verbose=0)
    print("========================")
    print("TEST SET: %s: %.2f%%" %
          (model_vgg16.metrics_names[1], scores[1] * 100))
    print("========================")

    print(model_vgg16.summary())
    print("=== BASE MODEL SUMMARY ===")
    print(VGG16_MODEL.summary())

    # save model
    make_folder('../results/models')
    model_vgg16.save('../results/models/vgg16.h5')
    history = dict()
    history['acc'] = history_custom.history['acc']
    history['val_acc'] = history_custom.history['val_acc']
    history['loss'] = history_custom.history['loss']
    history['val_loss'] = history_custom.history['val_loss']

    hist = Hist()
    setattr(hist, 'history', history)
    pickle.dump(hist, open('../results/models/vgg16_training_history.pkl',
                           'wb'))