def main(): """ Load images. Extract HOG feature descriptors. """ # Load stored data X_train = np.load('../data/augment/ImageAugment_input.npy') print('=== TRAIN DATA ===') print(X_train.shape) X_test = np.load('../data/test/ImageTest_input.npy') print('=== TEST DATA ===') print(X_test.shape) print("Extracting HOG features...") RF_train = np.zeros([len(X_train), 32400]) for i in range(len(X_train)): RF_train[i] = hog_feature(X_train[i]) print("FEATURE DESCRIPTORS") print(RF_train.shape) RF_test = np.zeros([len(X_test), 32400]) for i in range(len(X_test)): RF_test[i] = hog_feature(X_test[i]) print(RF_test.shape) # Save data make_folder('../data/processed') np.save('../data/processed/ImageTestHOG_input.npy', RF_test) np.save('../data/processed/ImageTrainHOG_input.npy', RF_train)
def main(): """ Load the data. Train SVM model using linear kernel. Print accuracy on test data. """ # Load stored data X_train = np.load('../data/processed/ImageTrainHOG_input.npy') y_train = np.load('../data/augment/DiseaseAugment_input.npy') print("=== TRAIN DATA ===") print(X_train.shape) print(y_train.shape) X_test = np.load('../data/processed/ImageTestHOG_input.npy') y_test = np.load('../data/test/DiseaseTest_input.npy') print("=== TEST DATA ===") print(X_test.shape) print(y_test.shape) # Classifier svm_model = LinearSVC(C=0.01) svm_model.fit(X_train, y_train) print(svm_model.score(X_test, y_test)) make_folder('../results/models') filename = '../results/models/SVM_model.sav' joblib.dump(svm_model, filename)
def main(): """ Load data. Train random forest model. Print accuracy on test data. """ # Load stored data X_train = np.load('../data/processed/ImageTrainHOG_input.npy') y_train = np.load('../data/augment/DiseaseAugment_input.npy') print("=== TRAIN DATA ===") print(X_train.shape) print(y_train.shape) X_test = np.load('../data/processed/ImageTestHOG_input.npy') y_test = np.load('../data/test/DiseaseTest_input.npy') print("=== TEST DATA ===") print(X_test.shape) print(y_test.shape) # Classifier Random_classifier = RandomForestClassifier(n_estimators=500, max_depth=35, n_jobs=-1, warm_start=True, oob_score=True, max_features='sqrt') Random_classifier.fit(X_train, y_train) print(Random_classifier.score(X_test, y_test)) make_folder('../results/models') filename = '../results/models/Random_model.sav' joblib.dump(Random_classifier, filename)
def con_matrix(model, x, y): """ Plot confusion matrix for the given model. Save the png in the output folder. Args: model (str): model for which visualization needs to be plot. x (numpy array): test images. y (numpy array): test labels. """ corr = [] if model == "random_forest": loaded_model = joblib.load(os.path.join(ROOT_DIR, 'Random_model.sav')) classifier_prediction = loaded_model.predict(x) corr = confusion_matrix(y, classifier_prediction) elif model == "svm": loaded_model = joblib.load(os.path.join(ROOT_DIR, 'SVM_model.sav')) classifier_prediction = loaded_model.predict(x) corr = confusion_matrix(y, classifier_prediction) elif model == "majority_voting": classifier_prediction = np.load(os.path.join(ROOT_DIR, 'Ensemble.npy')) corr = confusion_matrix(y, classifier_prediction) elif model == "stacked_prediction": labeler = LabelEncoder() labeler.fit(y) loaded_model = load_model(os.path.join(ROOT_DIR, 'custom_ensemble.h5')) y_prediction = loaded_model.predict( np.load('data/test/X_test_ensemble.npy')) prediction = np.argmax(y_prediction, axis=-1) prediction = labeler.inverse_transform(prediction) corr = confusion_matrix(y, prediction) make_confusion_matrix( corr, categories=['blackrot', 'ecsa', 'healthy', 'leafblight', 'pmildew'], count=True, percent=True, color_bar=False, xy_ticks=True, xy_plot_labels=True, sum_stats=True, fig_size=(8, 6), c_map='OrRd', title='Confusion matrix') # error correction - cropped heat map b, t = plt.ylim() # discover the values for bottom and top b += 0.5 # Add 0.5 to the bottom t -= 0.5 # Subtract 0.5 from the top plt.ylim(b, t) # update the ylim(bottom, top) values make_folder('results/visualization') plt.savefig('results/visualization/confusion_matrix_{}.png'.format(model), bbox_inches='tight')
def tree(): """ Plot the tree for random forest model. Save the dot file in output folder. Convert dot file to png by using the command: 'dot -Tpng tree.dot -o tree.png' """ model = joblib.load(os.path.join(ROOT_DIR, 'Random_model.sav')) tree_num = model.estimators_ make_folder('results/visualization') for tree_in_forest in tree_num: export_graphviz(tree_in_forest, out_file='results/visualization/tree.dot', filled=True, rounded=True, precision=2)
def plot(image, label, index, model, x_hog): """ Display image, true label and predicted label. Save the result in the output folder Args: image (numpy array): image to predict. label (numpy array): true labels of corresponding images. index (int): index of the test image entered by the user. model (str): model to use. (Entered by user) x_hog (numpy array): feature descriptors of the image. """ plt.figure(figsize=(8, 6)) plt.imshow(image[index]) plt.axis('off') plt.title('True label: {}'.format(label[index]), fontdict={ 'fontweight': 'bold', 'fontsize': 'x-large' }) predictions, percent = model_predict(model, image, x_hog, label) if model == "majority_voting": if predictions[index] == label[index]: plt.suptitle('Predicted label: {}'.format(predictions[index]), color="green") else: plt.suptitle('Predicted label: {}'.format(predictions[index]), color="red") else: if predictions[index] == label[index]: plt.suptitle('Predicted label: {} ({:.2f} %)'.format( predictions[index], np.max(percent[index]) * 100), color="green") else: plt.suptitle('Predicted label: {} ({:.2f} %)'.format( predictions[index], np.max(percent[index]) * 100), color="red") make_folder('results/visualization') plt.savefig('results/visualization/app.png', bbox_inches='tight')
def roc(x, y): """ Plot ROC-AUC plot for random forest model. Save the image in output folder. Args: x (numpy array): test images. y (numpy array): test labels. """ model = joblib.load(os.path.join(ROOT_DIR, 'Random_model.sav')) visualizer = yellowbrick.classifier.ROCAUC(model, classes=[ 'healthy', 'leaf_blight', 'ecsa', 'black rot', 'powdery mildew' ]) visualizer.score(x, y) ax = visualizer.show() make_folder('results/visualization') ax.figure.savefig('results/visualization/auc_roc.png')
def plot(model): """ Plot the accuracy and loss curve for the neural networks. Save file in the output folder. Args: model (str): model for which visualization needs to be plot """ history_custom = Hist() if model == "cnn_custom": history_custom = pickle.load( open(os.path.join(ROOT_DIR, 'custom_training_history.pkl'), 'rb')) elif model == "vgg": history_custom = pickle.load( open(os.path.join(ROOT_DIR, 'vgg16_training_history.pkl'), 'rb')) # Plot training & validation accuracy values fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[15, 8]) ax1.plot(history_custom.history['acc']) ax1.plot(history_custom.history['val_acc']) ax1.set_title('Model accuracy') ax1.set_ylabel('Accuracy') ax1.set_xlabel('Epoch') ax1.legend(['Train', 'Validation'], loc='lower right') # Plot training & validation loss values ax2.plot(history_custom.history['loss']) ax2.plot(history_custom.history['val_loss']) ax2.set_title('Model loss') ax2.set_ylabel('Loss') ax2.set_xlabel('Epoch') ax2.legend(['Train', 'Validation'], loc='upper right') make_folder('results/visualization') plt.savefig('results/visualization/acc_loss_{}.png'.format(model))
def main(): """ Load images and json files from all folders and concatenate to form a single array. Shuffle the array. Split into test and train data sets. """ print('INFO: Extracting json data...') # Accumulate data from json files json_df_images = get_json_data(os.path.join(ROOT_DIR, 'images/')) json_df_positive = get_json_data(os.path.join(ROOT_DIR, 'positive/')) json_df_healthy = get_json_data(os.path.join(ROOT_DIR, 'healthy/')) json_df_team4 = get_json_data(os.path.join(ROOT_DIR, 'team4/')) json_df_team4_br = get_json_data(os.path.join(ROOT_DIR, 'team4_br/')) json_df_leaf_blight = get_json_data(os.path.join(ROOT_DIR, 'leaf_blight/')) # Accumulate data set from all folders print('INFO: Extracting images and corresponding labels...') array1, disease1 = get_images(os.path.join(ROOT_DIR, 'Grape/Black_rot/'), name='black rot') array2, disease2 = get_images(os.path.join(ROOT_DIR, 'Grape/Esca/'), name='ecsa') array3, disease3 = get_images(os.path.join(ROOT_DIR, 'Grape/Leaf_blight/'), name='leaf_blight') array4, disease4 = get_images(os.path.join(ROOT_DIR, 'Grape/healthy/'), name='healthy') array5, disease5 = get_images(os.path.join(ROOT_DIR, 'images/'), js=json_df_images) array6, disease6 = get_images(os.path.join(ROOT_DIR, 'positive/'), js=json_df_positive) array7, disease7 = get_images(os.path.join(ROOT_DIR, 'healthy/'), js=json_df_healthy) array8, disease8 = get_images(os.path.join(ROOT_DIR, 'team4/'), js=json_df_team4) array9, disease9 = get_images(os.path.join(ROOT_DIR, 'team4_br/'), js=json_df_team4_br) array10, disease10 = get_images(os.path.join(ROOT_DIR, 'leaf_blight/'), js=json_df_leaf_blight) # Concatenate data disease_arr = np.concatenate( (disease1, disease2, disease3, disease4, disease5, disease6, disease7, disease8, disease9, disease10), axis=0) print('=== TOTAL DATA ===') print(disease_arr.shape) img_arr = np.concatenate((array1, array2, array3, array4, array5, array6, array7, array8, array9, array10), axis=0) print(img_arr.shape) # Shuffle data img_arr, disease_arr = shuffle(img_arr, disease_arr, random_state=42) print(np.unique(disease_arr)) # split train set and test set X_train, X_test, y_train, y_test = train_test_split(img_arr, disease_arr, test_size=0.2, random_state=42) print('=== TRAIN TEST SPLIT ===') print(X_test.shape) print(X_train.shape) # Save data make_folder('../data/test') make_folder('../data/intermediate') np.save('../data/test/ImageTest_input.npy', X_test) np.save('../data/test/DiseaseTest_input.npy', y_test) np.save('../data/intermediate/ImageTrain_input.npy', X_train) np.save('../data/intermediate/DiseaseTrain_input.npy', y_train)
def main(): """ Load data. Normalize and encode. Train custom CNN model. Print accuracy on test data. """ # Load stored data X_train = np.load('../data/augment/ImageAugment_input.npy') y_train = np.load('../data/augment/DiseaseAugment_input.npy') print("=== TRAIN DATA ===") print(X_train.shape) print(y_train.shape) X_test = np.load('../data/test/ImageTest_input.npy') y_test = np.load('../data/test/DiseaseTest_input.npy') print("=== TEST DATA ===") print(X_test.shape) print(y_test.shape) # hot encoding of labels y_train = encoder(y_train, NUM_CLASSES) y_test = encoder(y_test, NUM_CLASSES) # Input normalization X_train = (X_train / 255.0).astype(np.float32) X_test = (X_test / 255.0).astype(np.float32) # Custom CNN model model_custom = Sequential( (layers.Conv2D(32, kernel_size=(3, 3), padding='same', input_shape=(180, 180, 3)), layers.Activation('relu'), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.Conv2D(32, kernel_size=(3, 3), padding='same'), layers.Activation('relu'), layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(), layers.Dropout(0.5), layers.Dense(128), layers.Activation('relu'), layers.Dense(NUM_CLASSES, activation='softmax'))) model_custom.compile(optimizer=Adam(), loss="categorical_crossentropy", metrics=['accuracy']) # Learning rate decay reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001) history_custom = model_custom.fit(X_train, y_train, batch_size=8, epochs=1, verbose=1, validation_split=.1, callbacks=[reduce_lr]) scores = model_custom.evaluate(X_test, y_test, verbose=0) print("========================") print("TEST SET: %s: %.2f%%" % (model_custom.metrics_names[1], scores[1] * 100)) print("========================") print(model_custom.summary()) # save model make_folder('../results/models/') model_custom.save('../results/models/custom.h5') history = dict() history['acc'] = history_custom.history['acc'] history['val_acc'] = history_custom.history['val_acc'] history['loss'] = history_custom.history['loss'] history['val_loss'] = history_custom.history['val_loss'] hist = Hist() setattr(hist, 'history', history) pickle.dump(hist, open('../results/models/custom_training_history.pkl', 'wb'))
def main(): """ Load data. Normalize and encode. Train CNN-VGG16 model. Print accuracy on test data. """ # Load stored data X_train = np.load('../data/augment/ImageAugment_input.npy') y_train = np.load('../data/augment/DiseaseAugment_input.npy') print("=== TRAIN DATA ===") print(X_train.shape) print(y_train.shape) X_test = np.load('../data/test/ImageTest_input.npy') y_test = np.load('../data/test/DiseaseTest_input.npy') print("=== TEST DATA ===") print(X_test.shape) print(y_test.shape) # hot encoding of labels y_train = encoder(y_train, NUM_CLASSES) y_test = encoder(y_test, NUM_CLASSES) # Input normalization X_train = (X_train / 255.0).astype(np.float32) X_test = (X_test / 255.0).astype(np.float32) # VGG16 CNN model IMG_SHAPE = (180, 180, 3) VGG16_MODEL = tf.keras.applications.VGG16(input_shape=IMG_SHAPE, include_top=False, weights='imagenet') VGG16_MODEL.trainable = False global_average_layer = tf.keras.layers.GlobalAveragePooling2D() prediction_layer = Dense(NUM_CLASSES, activation='softmax') model_vgg16 = Sequential([ VGG16_MODEL, Conv2D(512, kernel_size=(3, 3), padding='same'), Activation('relu'), Conv2D(1024, kernel_size=(3, 3), padding='same'), global_average_layer, prediction_layer ]) model_vgg16.compile(optimizer=Adam(), loss="categorical_crossentropy", metrics=['accuracy']) # Learning rate decay reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001) history_custom = model_vgg16.fit(X_train, y_train, batch_size=8, epochs=20, verbose=1, validation_split=.1, callbacks=[reduce_lr]) scores = model_vgg16.evaluate(X_test, y_test, verbose=0) print("========================") print("TEST SET: %s: %.2f%%" % (model_vgg16.metrics_names[1], scores[1] * 100)) print("========================") print(model_vgg16.summary()) print("=== BASE MODEL SUMMARY ===") print(VGG16_MODEL.summary()) # save model make_folder('../results/models') model_vgg16.save('../results/models/vgg16.h5') history = dict() history['acc'] = history_custom.history['acc'] history['val_acc'] = history_custom.history['val_acc'] history['loss'] = history_custom.history['loss'] history['val_loss'] = history_custom.history['val_loss'] hist = Hist() setattr(hist, 'history', history) pickle.dump(hist, open('../results/models/vgg16_training_history.pkl', 'wb'))