def Text_Classification(x_train, y_train, x_test, y_test, batch_size=128, EMBEDDING_DIM=50,MAX_SEQUENCE_LENGTH = 500, MAX_NB_WORDS = 75000, GloVe_dir="", GloVe_file = "glove.6B.50d.txt", sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500], plot=False, min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, random_state=42, random_optimizor=True, dropout=0.5,no_of_classes=0): """ Text_Classification(x_train, y_train, x_test, y_test, batch_size=128, EMBEDDING_DIM=50,MAX_SEQUENCE_LENGTH = 500, MAX_NB_WORDS = 75000, GloVe_dir="", GloVe_file = "glove.6B.50d.txt", sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500], plot=False, min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, random_state=42, random_optimizor=True, dropout=0.5): Parameters ---------- batch_size : Integer, , optional Number of samples per gradient update. If unspecified, it will default to 128 MAX_NB_WORDS: int, optional Maximum number of unique words in datasets, it will default to 75000. GloVe_dir: String, optional Address of GloVe or any pre-trained directory, it will default to null which glove.6B.zip will be download. GloVe_dir: String, optional Which version of GloVe or pre-trained word emending will be used, it will default to glove.6B.50d.txt. NOTE: if you use other version of GloVe EMBEDDING_DIM must be same dimensions. sparse_categorical: bool. When target's dataset is (n,1) should be True, it will default to True. random_deep: array of int [3], optional Number of ensembled model used in RMDL random_deep[0] is number of DNN, random_deep[1] is number of RNN, random_deep[0] is number of CNN, it will default to [3, 3, 3]. epochs: array of int [3], optional Number of epochs in each ensembled model used in RMDL epochs[0] is number of epochs used in DNN, epochs[1] is number of epochs used in RNN, epochs[0] is number of epochs used in CNN, it will default to [500, 500, 500]. plot: bool, optional True: shows confusion matrix and accuracy and loss min_hidden_layer_dnn: Integer, optional Lower Bounds of hidden layers of DNN used in RMDL, it will default to 1. max_hidden_layer_dnn: Integer, optional Upper bounds of hidden layers of DNN used in RMDL, it will default to 8. min_nodes_dnn: Integer, optional Lower bounds of nodes in each layer of DNN used in RMDL, it will default to 128. max_nodes_dnn: Integer, optional Upper bounds of nodes in each layer of DNN used in RMDL, it will default to 1024. min_hidden_layer_rnn: Integer, optional Lower Bounds of hidden layers of RNN used in RMDL, it will default to 1. min_hidden_layer_rnn: Integer, optional Upper Bounds of hidden layers of RNN used in RMDL, it will default to 5. min_nodes_rnn: Integer, optional Lower bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 32. max_nodes_rnn: Integer, optional Upper bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 128. min_hidden_layer_cnn: Integer, optional Lower Bounds of hidden layers of CNN used in RMDL, it will default to 3. max_hidden_layer_cnn: Integer, optional Upper Bounds of hidden layers of CNN used in RMDL, it will default to 10. min_nodes_cnn: Integer, optional Lower bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 128. min_nodes_cnn: Integer, optional Upper bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 512. random_state : Integer, optional RandomState instance or None, optional (default=None) If Integer, random_state is the seed used by the random number generator; random_optimizor : bool, optional If False, all models use adam optimizer. If True, all models use random optimizers. it will default to True dropout: Float, optional between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. """ np.random.seed(random_state) glove_directory = GloVe_dir GloVe_file = GloVe_file print("Done1") GloVe_needed = random_deep[1] != 0 or random_deep[2] != 0 # example_input = [0,1,3] # example_output : # # [[1 0 0 0] # [0 1 0 0] # [0 0 0 1]] def one_hot_encoder(value, label_data_): label_data_[value] = 1 return label_data_ def _one_hot_values(labels_data): encoded = [0] * len(labels_data) for index_no, value in enumerate(labels_data): max_value = [0] * (np.max(labels_data) + 1) encoded[index_no] = one_hot_encoder(value, max_value) return np.array(encoded) if not isinstance(y_train[0], list) and not isinstance(y_train[0], np.ndarray) and not sparse_categorical:: #checking if labels are one hot or not otherwise dense_layer will give shape error print("converted_into_one_hot") y_train = _one_hot_values(y_train) y_test = _one_hot_values(y_test) if GloVe_needed: if glove_directory == "": GloVe_directory = GloVe.download_and_extract() GloVe_DIR = os.path.join(GloVe_directory, GloVe_file) else: GloVe_DIR = os.path.join(glove_directory, GloVe_file) if not os.path.isfile(GloVe_DIR): print("Could not find %s Set GloVe Directory in Global.py ", GloVe) exit() G.setup() if random_deep[0] != 0: x_train_tfidf, x_test_tfidf = txt.loadData(x_train, x_test,MAX_NB_WORDS=MAX_NB_WORDS) if random_deep[1] != 0 or random_deep[2] != 0 : print(GloVe_DIR) x_train_embedded, x_test_embedded, word_index, embeddings_index = txt.loadData_Tokenizer(x_train, x_test,GloVe_DIR,MAX_NB_WORDS,MAX_SEQUENCE_LENGTH,EMBEDDING_DIM) del x_train del x_test gc.collect() y_pr = [] History = [] score = [] if no_of_classes==0: #checking no_of_classes #np.max(data)+1 will not work for one_hot encoding labels if sparse_categorical: number_of_classes = np.max(y_train) + 1 else: number_of_classes = len(y_train[0]) else: number_of_classes = no_of_classes print(number_of_classes) i = 0 while i < random_deep[0]: # model_DNN.append(Sequential()) try: print("DNN " + str(i)) filepath = "weights\weights_DNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model_DNN, model_tmp = BuildModel.Build_Model_DNN_Text(x_train_tfidf.shape[1], number_of_classes, sparse_categorical, min_hidden_layer_dnn, max_hidden_layer_dnn, min_nodes_dnn, max_nodes_dnn, random_optimizor, dropout) model_history = model_DNN.fit(x_train_tfidf, y_train, validation_data=(x_test_tfidf, y_test), epochs=epochs[0], batch_size=batch_size, callbacks=callbacks_list, verbose=2) History.append(model_history) model_tmp.load_weights(filepath) if sparse_categorical: model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr_ = model_tmp.predict_classes(x_test_tfidf, batch_size=batch_size) y_pr.append(np.array(y_pr_)) score.append(accuracy_score(y_test, y_pr_)) else: model_tmp.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr_ = model_tmp.predict(x_test_tfidf, batch_size=batch_size) y_pr_ = np.argmax(y_pr_, axis=1) y_pr.append(np.array(y_pr_)) y_test_temp = np.argmax(y_test, axis=1) score.append(accuracy_score(y_test_temp, y_pr_)) # print(y_proba) i += 1 del model_tmp del model_DNN except Exception as e: print("Check the Error \n {} ".format(e)) print("Error in model", i, "try to re-generate another model") if max_hidden_layer_dnn > 3: max_hidden_layer_dnn -= 1 if max_nodes_dnn > 256: max_nodes_dnn -= 8 try: del x_train_tfidf del x_test_tfidf gc.collect() except: pass i=0 while i < random_deep[1]: try: print("RNN " + str(i)) filepath = "weights\weights_RNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model_RNN, model_tmp = BuildModel.Build_Model_RNN_Text(word_index, embeddings_index, number_of_classes, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, sparse_categorical, min_hidden_layer_rnn, max_hidden_layer_rnn, min_nodes_rnn, max_nodes_rnn, random_optimizor, dropout) model_history = model_RNN.fit(x_train_embedded, y_train, validation_data=(x_test_embedded, y_test), epochs=epochs[1], batch_size=batch_size, callbacks=callbacks_list, verbose=2) History.append(model_history) if sparse_categorical: model_tmp.load_weights(filepath) model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) y_pr_ = model_tmp.predict_classes(x_test_embedded, batch_size=batch_size) y_pr.append(np.array(y_pr_)) score.append(accuracy_score(y_test, y_pr_)) else: model_tmp.load_weights(filepath) model_tmp.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) y_pr_ = model_tmp.predict(x_test_embedded, batch_size=batch_size) y_pr_ = np.argmax(y_pr_, axis=1) y_pr.append(np.array(y_pr_)) y_test_temp = np.argmax(y_test, axis=1) score.append(accuracy_score(y_test_temp, y_pr_)) i += 1 del model_tmp del model_RNN gc.collect() except: print("Error in model", i, "try to re-generate another model") if max_hidden_layer_rnn > 3: max_hidden_layer_rnn -= 1 if max_nodes_rnn > 64: max_nodes_rnn -= 2 gc.collect() i = 0 while i < random_deep[2]: try: print("CNN " + str(i)) model_CNN, model_tmp = BuildModel.Build_Model_CNN_Text(word_index, embeddings_index, number_of_classes, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, sparse_categorical, min_hidden_layer_cnn, max_hidden_layer_cnn, min_nodes_cnn, max_nodes_cnn, random_optimizor, dropout) filepath = "weights\weights_CNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model_history = model_CNN.fit(x_train_embedded, y_train, validation_data=(x_test_embedded, y_test), epochs=epochs[2], batch_size=batch_size, callbacks=callbacks_list, verbose=2) History.append(model_history) model_tmp.load_weights(filepath) if sparse_categorical: model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) else: model_tmp.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) y_pr_ = model_tmp.predict(x_test_embedded, batch_size=batch_size) y_pr_ = np.argmax(y_pr_, axis=1) y_pr.append(np.array(y_pr_)) if sparse_categorical: score.append(accuracy_score(y_test, y_pr_)) else: y_test_temp = np.argmax(y_test, axis=1) score.append(accuracy_score(y_test_temp, y_pr_)) i += 1 del model_tmp del model_CNN gc.collect() except: print("Error in model", i, "try to re-generate an other model") if max_hidden_layer_cnn > 5: max_hidden_layer_cnn -= 1 if max_nodes_cnn > 128: max_nodes_cnn -= 2 min_nodes_cnn -= 1 gc.collect() y_proba = np.array(y_pr).transpose() final_y = [] for i in range(0, y_proba.shape[0]): a = np.array(y_proba[i, :]) a = collections.Counter(a).most_common()[0][0] final_y.append(a) if sparse_categorical: F_score = accuracy_score(y_test, final_y) F1 = precision_recall_fscore_support(y_test, final_y, average='micro') F2 = precision_recall_fscore_support(y_test, final_y, average='macro') F3 = precision_recall_fscore_support(y_test, final_y, average='weighted') cnf_matrix = confusion_matrix(y_test, final_y) # Compute confusion matrix # Plot non-normalized confusion matrix if plot: classes = list(range(0, np.max(y_test)+1)) Plot.plot_confusion_matrix(cnf_matrix, classes=classes, title='Confusion matrix, without normalization') # Plot normalized confusion matrix Plot.plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True, title='Normalized confusion matrix') else: y_test_temp = np.argmax(y_test, axis=1) F_score = accuracy_score(y_test_temp, final_y) F1 = precision_recall_fscore_support(y_test_temp, final_y, average='micro') F2 = precision_recall_fscore_support(y_test_temp, final_y, average='macro') F3 = precision_recall_fscore_support(y_test_temp, final_y, average='weighted') if plot: Plot.RMDL_epoch(History) print(y_proba.shape) print("Accuracy of",len(score),"models:",score) print("Accuracy:",F_score) print("F1_Micro:",F1) print("F1_Macro:",F2) print("F1_weighted:",F3)
def Text_Classification(x_train, y_train, x_test, y_test, batch_size=128, EMBEDDING_DIM=50, MAX_SEQUENCE_LENGTH=500, MAX_NB_WORDS=75000, GloVe_dir="", GloVe_file="glove.6B.50d.txt", sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500], plot=False, min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, random_state=42, random_optimizor=True, dropout=0.5, no_of_classes=0): np.random.seed(random_state) glove_directory = GloVe_dir GloVe_file = GloVe_file GloVe_needed = random_deep[1] != 0 or random_deep[2] != 0 # example_input = [0,1,3] # example_output : # # [[1 0 0 0] # [0 1 0 0] # [0 0 0 1]] def one_hot_encoder(value, datal): datal[value] = 1 return datal def _one_hot_values(labels_data): encoded = [0] * len(labels_data) for j, i in enumerate(labels_data): max_value = [0] * (np.max(labels_data) + 1) encoded[j] = one_hot_encoder(i, max_value) return np.array(encoded) if not isinstance(y_train[0], list) and not isinstance( y_train[0], np.ndarray): #checking if labels are one hot or not otherwise dense_layer will give shape error print("converted_into_one_hot") y_train = _one_hot_values(y_train) y_test = _one_hot_values(y_test) if GloVe_needed: if glove_directory == "": GloVe_directory = GloVe.download_and_extract() GloVe_DIR = os.path.join(GloVe_directory, GloVe_file) else: GloVe_DIR = os.path.join(glove_directory, GloVe_file) if not os.path.isfile(GloVe_DIR): print("Could not find %s Set GloVe Directory in Global.py ", GloVe) exit() G.setup() if random_deep[0] != 0: x_train_tfidf, x_test_tfidf = txt.loadData(x_train, x_test, MAX_NB_WORDS=MAX_NB_WORDS) if random_deep[1] != 0 or random_deep[2] != 0: print(GloVe_DIR) x_train_embedded, x_test_embedded, word_index, embeddings_index = txt.loadData_Tokenizer( x_train, x_test, GloVe_DIR, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM) del x_train del x_test gc.collect() y_pr = [] History = [] score = [] if no_of_classes == 0: #checking no_of_classes #np.max(data)+1 will not work for one_hot encoding labels number_of_classes = len(y_train[0]) print(number_of_classes) else: number_of_classes = no_of_classes print(number_of_classes) i = 0 while i < random_deep[0]: # model_DNN.append(Sequential()) try: print("DNN " + str(i)) filepath = "weights\weights_DNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model_DNN, model_tmp = BuildModel.Build_Model_DNN_Text( x_train_tfidf.shape[1], number_of_classes, sparse_categorical, min_hidden_layer_dnn, max_hidden_layer_dnn, min_nodes_dnn, max_nodes_dnn, random_optimizor, dropout) model_history = model_DNN.fit(x_train_tfidf, y_train, validation_data=(x_test_tfidf, y_test), epochs=epochs[0], batch_size=batch_size, callbacks=callbacks_list, verbose=2) History.append(model_history) model_tmp.load_weights(filepath) if sparse_categorical: model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr_ = model_tmp.predict_classes(x_test_tfidf, batch_size=batch_size) y_pr.append(np.array(y_pr_)) score.append(accuracy_score(y_test, y_pr_)) else: model_tmp.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr_ = model_tmp.predict(x_test_tfidf, batch_size=batch_size) y_pr_ = np.argmax(y_pr_, axis=1) y_pr.append(np.array(y_pr_)) y_test_temp = np.argmax(y_test, axis=1) score.append(accuracy_score(y_test_temp, y_pr_)) # print(y_proba) i += 1 del model_tmp del model_DNN except Exception as e: print("Check the Error \n {} ".format(e)) print("Error in model", i, "try to re-generate another model") if max_hidden_layer_dnn > 3: max_hidden_layer_dnn -= 1 if max_nodes_dnn > 256: max_nodes_dnn -= 8 try: del x_train_tfidf del x_test_tfidf gc.collect() except: pass i = 0 while i < random_deep[1]: try: print("RNN " + str(i)) filepath = "weights\weights_RNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model_RNN, model_tmp = BuildModel.Build_Model_RNN_Text( word_index, embeddings_index, number_of_classes, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, sparse_categorical, min_hidden_layer_rnn, max_hidden_layer_rnn, min_nodes_rnn, max_nodes_rnn, random_optimizor, dropout) model_history = model_RNN.fit(x_train_embedded, y_train, validation_data=(x_test_embedded, y_test), epochs=epochs[1], batch_size=batch_size, callbacks=callbacks_list, verbose=2) History.append(model_history) if sparse_categorical: model_tmp.load_weights(filepath) model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) y_pr_ = model_tmp.predict_classes(x_test_embedded, batch_size=batch_size) y_pr.append(np.array(y_pr_)) score.append(accuracy_score(y_test, y_pr_)) else: model_tmp.load_weights(filepath) model_tmp.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) y_pr_ = model_tmp.predict(x_test_embedded, batch_size=batch_size) y_pr_ = np.argmax(y_pr_, axis=1) y_pr.append(np.array(y_pr_)) y_test_temp = np.argmax(y_test, axis=1) score.append(accuracy_score(y_test_temp, y_pr_)) i += 1 del model_tmp del model_RNN gc.collect() except: print("Error in model", i, "try to re-generate another model") if max_hidden_layer_rnn > 3: max_hidden_layer_rnn -= 1 if max_nodes_rnn > 64: max_nodes_rnn -= 2 gc.collect() i = 0 while i < random_deep[2]: try: print("CNN " + str(i)) model_CNN, model_tmp = BuildModel.Build_Model_CNN_Text( word_index, embeddings_index, number_of_classes, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, sparse_categorical, min_hidden_layer_cnn, max_hidden_layer_cnn, min_nodes_cnn, max_nodes_cnn, random_optimizor, dropout) filepath = "weights\weights_CNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model_history = model_CNN.fit(x_train_embedded, y_train, validation_data=(x_test_embedded, y_test), epochs=epochs[2], batch_size=batch_size, callbacks=callbacks_list, verbose=2) History.append(model_history) model_tmp.load_weights(filepath) if sparse_categorical: model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) else: model_tmp.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) y_pr_ = model_tmp.predict(x_test_embedded, batch_size=batch_size) y_pr_ = np.argmax(y_pr_, axis=1) y_pr.append(np.array(y_pr_)) if sparse_categorical: score.append(accuracy_score(y_test, y_pr_)) else: y_test_temp = np.argmax(y_test, axis=1) score.append(accuracy_score(y_test_temp, y_pr_)) i += 1 del model_tmp del model_CNN gc.collect() except: print("Error in model", i, "try to re-generate an other model") if max_hidden_layer_cnn > 5: max_hidden_layer_cnn -= 1 if max_nodes_cnn > 128: max_nodes_cnn -= 2 min_nodes_cnn -= 1 gc.collect() y_proba = np.array(y_pr).transpose() final_y = [] for i in range(0, y_proba.shape[0]): a = np.array(y_proba[i, :]) a = collections.Counter(a).most_common()[0][0] final_y.append(a) if sparse_categorical: F_score = accuracy_score(y_test, final_y) F1 = precision_recall_fscore_support(y_test, final_y, average='micro') F2 = precision_recall_fscore_support(y_test, final_y, average='macro') F3 = precision_recall_fscore_support(y_test, final_y, average='weighted') cnf_matrix = confusion_matrix(y_test, final_y) # Compute confusion matrix # Plot non-normalized confusion matrix if plot: classes = list(range(0, np.max(y_test) + 1)) Plot.plot_confusion_matrix( cnf_matrix, classes=classes, title='Confusion matrix, without normalization') # Plot normalized confusion matrix Plot.plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True, title='Normalized confusion matrix') else: y_test_temp = np.argmax(y_test, axis=1) F_score = accuracy_score(y_test_temp, final_y) F1 = precision_recall_fscore_support(y_test_temp, final_y, average='micro') F2 = precision_recall_fscore_support(y_test_temp, final_y, average='macro') F3 = precision_recall_fscore_support(y_test_temp, final_y, average='weighted') if plot: Plot.RMDL_epoch(History) print(y_proba.shape) print("Accuracy of", len(score), "models:", score) print("Accuracy:", F_score) print("F1_Micro:", F1) print("F1_Macro:", F2) print("F1_weighted:", F3)
def Image_Classification(x_train, y_train, x_test, y_test, shape, batch_size=128, sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500], plot=False, min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, random_state=42, random_optimizor=True, dropout=0.05): """ def Image_Classification(x_train, y_train, x_test, y_test, shape, batch_size=128, sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500], plot=False, min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, random_state=42, random_optimizor=True, dropout=0.05): Parameters ---------- x_train : string input X for training y_train : int input Y for training x_test : string input X for testing x_test : int input Y for testing shape : np.shape shape of image. The most common situation would be a 2D input with shape (batch_size, input_dim). batch_size : Integer, , optional Number of samples per gradient update. If unspecified, it will default to 128 MAX_NB_WORDS: int, optional Maximum number of unique words in datasets, it will default to 75000. GloVe_dir: String, optional Address of GloVe or any pre-trained directory, it will default to null which glove.6B.zip will be download. GloVe_dir: String, optional Which version of GloVe or pre-trained word emending will be used, it will default to glove.6B.50d.txt. NOTE: if you use other version of GloVe EMBEDDING_DIM must be same dimensions. sparse_categorical: bool. When target's dataset is (n,1) should be True, it will default to True. random_deep: array of int [3], optional Number of ensembled model used in RMDL random_deep[0] is number of DNN, random_deep[1] is number of RNN, random_deep[0] is number of CNN, it will default to [3, 3, 3]. epochs: array of int [3], optional Number of epochs in each ensembled model used in RMDL epochs[0] is number of epochs used in DNN, epochs[1] is number of epochs used in RNN, epochs[0] is number of epochs used in CNN, it will default to [500, 500, 500]. plot: bool, optional True: shows confusion matrix and accuracy and loss min_hidden_layer_dnn: Integer, optional Lower Bounds of hidden layers of DNN used in RMDL, it will default to 1. max_hidden_layer_dnn: Integer, optional Upper bounds of hidden layers of DNN used in RMDL, it will default to 8. min_nodes_dnn: Integer, optional Lower bounds of nodes in each layer of DNN used in RMDL, it will default to 128. max_nodes_dnn: Integer, optional Upper bounds of nodes in each layer of DNN used in RMDL, it will default to 1024. min_hidden_layer_rnn: Integer, optional Lower Bounds of hidden layers of RNN used in RMDL, it will default to 1. min_hidden_layer_rnn: Integer, optional Upper Bounds of hidden layers of RNN used in RMDL, it will default to 5. min_nodes_rnn: Integer, optional Lower bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 32. max_nodes_rnn: Integer, optional Upper bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 128. min_hidden_layer_cnn: Integer, optional Lower Bounds of hidden layers of CNN used in RMDL, it will default to 3. max_hidden_layer_cnn: Integer, optional Upper Bounds of hidden layers of CNN used in RMDL, it will default to 10. min_nodes_cnn: Integer, optional Lower bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 128. min_nodes_cnn: Integer, optional Upper bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 512. random_state : Integer, optional RandomState instance or None, optional (default=None) If Integer, random_state is the seed used by the random number generator; random_optimizor : bool, optional If False, all models use adam optimizer. If True, all models use random optimizers. it will default to True dropout: Float, optional between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. """ if len(x_train) != len(y_train): raise ValueError('shape of x_train and y_train must be equal' 'The x_train has ' + str(len(x_train)) + 'The x_train has' + str(len(y_train))) if len(x_test) != len(y_test): raise ValueError('shape of x_test and y_test must be equal ' 'The x_train has ' + str(len(x_test)) + 'The y_test has ' + str(len(y_test))) np.random.seed(random_state) G.setup() y_proba = [] score = [] history_ = [] if sparse_categorical: number_of_classes = np.max(y_train)+1 else: number_of_classes = np.shape(y_train)[0] i =0 while i < random_deep[0]: try: print("DNN ", i, "\n") model_DNN, model_tmp = BuildModel.Build_Model_DNN_Image(shape, number_of_classes, sparse_categorical, min_hidden_layer_dnn, max_hidden_layer_dnn, min_nodes_dnn, max_nodes_dnn, random_optimizor, dropout) filepath = "weights\weights_DNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] history = model_DNN.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs[0], batch_size=batch_size, callbacks=callbacks_list, verbose=2) history_.append(history) model_tmp.load_weights(filepath) if sparse_categorical == 0: model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) else: model_tmp.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr = model_tmp.predict_classes(x_test, batch_size=batch_size) y_proba.append(np.array(y_pr)) score.append(accuracy_score(y_test, y_pr)) i = i + 1 del model_tmp del model_DNN gc.collect() except: print("Error in model", i, "try to re-generate an other model") if max_hidden_layer_dnn > 3: max_hidden_layer_dnn -= 1 if max_nodes_dnn > 256: max_nodes_dnn -= 8 i =0 while i < random_deep[1]: try: print("RNN ", i, "\n") model_RNN, model_tmp = BuildModel.Build_Model_RNN_Image(shape, number_of_classes, sparse_categorical, min_nodes_rnn, max_nodes_rnn, random_optimizor, dropout) filepath = "weights\weights_RNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] history = model_RNN.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs[1], batch_size=batch_size, verbose=2, callbacks=callbacks_list) model_tmp.load_weights(filepath) model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) history_.append(history) y_pr = model_tmp.predict(x_test, batch_size=batch_size) y_pr = np.argmax(y_pr, axis=1) y_proba.append(np.array(y_pr)) score.append(accuracy_score(y_test, y_pr)) i = i+1 del model_tmp del model_RNN gc.collect() except: print("Error in model", i, " try to re-generate another model") if max_hidden_layer_rnn > 3: max_hidden_layer_rnn -= 1 if max_nodes_rnn > 64: max_nodes_rnn -= 2 # reshape to be [samples][pixels][width][height] i=0 while i < random_deep[2]: try: print("CNN ", i, "\n") model_CNN, model_tmp = BuildModel.Build_Model_CNN_Image(shape, number_of_classes, sparse_categorical, min_hidden_layer_cnn, max_hidden_layer_cnn, min_nodes_cnn, max_nodes_cnn, random_optimizor, dropout) filepath = "weights\weights_CNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] history = model_CNN.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs[2], batch_size=batch_size, callbacks=callbacks_list, verbose=2) history_.append(history) model_tmp.load_weights(filepath) model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr = model_tmp.predict_classes(x_test, batch_size=batch_size) y_proba.append(np.array(y_pr)) score.append(accuracy_score(y_test, y_pr)) i = i+1 del model_tmp del model_CNN gc.collect() except: print("Error in model", i, " try to re-generate another model") if max_hidden_layer_cnn > 5: max_hidden_layer_cnn -= 1 if max_nodes_cnn > 128: max_nodes_cnn -= 2 min_nodes_cnn -= 1 y_proba = np.array(y_proba).transpose() print(y_proba.shape) final_y = [] for i in range(0, y_proba.shape[0]): a = np.array(y_proba[i, :]) a = collections.Counter(a).most_common()[0][0] final_y.append(a) F_score = accuracy_score(y_test, final_y) F1 = f1_score(y_test, final_y, average='micro') F2 = f1_score(y_test, final_y, average='macro') F3 = f1_score(y_test, final_y, average='weighted') cnf_matrix = confusion_matrix(y_test, final_y) # Compute confusion matrix np.set_printoptions(precision=2) if plot: # Plot non-normalized confusion matrix classes = list(range(0,np.max(y_test)+1)) Plot.plot_confusion_matrix(cnf_matrix, classes=classes, title='Confusion matrix, without normalization') Plot.plot_confusion_matrix(cnf_matrix, classes=classes,normalize=True, title='Confusion matrix, without normalization') if plot: Plot.RMDL_epoch(history_) print(y_proba.shape) print("Accuracy of",len(score),"models:",score) print("Accuracy:",F_score) print("F1_Micro:",F1) print("F1_Macro:",F2) print("F1_weighted:",F3)
def Image_Classification(x_train, y_train, x_test, y_test, shape, batch_size=128, sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500], plot=False, min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, random_state=42, random_optimizor=True, dropout=0.05): np.random.seed(random_state) G.setup() y_proba = [] score = [] history_ = [] if sparse_categorical: number_of_classes = np.max(y_train) + 1 else: number_of_classes = np.shape(y_train)[0] i = 0 while i < random_deep[0]: try: print("DNN ", i, "\n") model_DNN, model_tmp = BuildModel.Build_Model_DNN_Image( shape, number_of_classes, sparse_categorical, min_hidden_layer_dnn, max_hidden_layer_dnn, min_nodes_dnn, max_nodes_dnn, random_optimizor, dropout) filepath = "weights\weights_DNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] history = model_DNN.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs[0], batch_size=batch_size, callbacks=callbacks_list, verbose=2) history_.append(history) model_tmp.load_weights(filepath) if sparse_categorical == 0: model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) else: model_tmp.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr = model_tmp.predict_classes(x_test, batch_size=batch_size) y_proba.append(np.array(y_pr)) score.append(accuracy_score(y_test, y_pr)) i = i + 1 del model_tmp del model_DNN gc.collect() except: print("Error in model", i, "try to re-generate an other model") if max_hidden_layer_dnn > 3: max_hidden_layer_dnn -= 1 if max_nodes_dnn > 256: max_nodes_dnn -= 8 i = 0 while i < random_deep[1]: try: print("RNN ", i, "\n") model_RNN, model_tmp = BuildModel.Build_Model_RNN_Image( shape, number_of_classes, sparse_categorical, min_nodes_rnn, max_nodes_rnn, random_optimizor, dropout) filepath = "weights\weights_RNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] history = model_RNN.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs[1], batch_size=batch_size, verbose=2, callbacks=callbacks_list) model_tmp.load_weights(filepath) model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) history_.append(history) y_pr = model_tmp.predict(x_test, batch_size=batch_size) y_pr = np.argmax(y_pr, axis=1) y_proba.append(np.array(y_pr)) score.append(accuracy_score(y_test, y_pr)) i = i + 1 del model_tmp del model_RNN gc.collect() except: print("Error in model", i, " try to re-generate another model") if max_hidden_layer_rnn > 3: max_hidden_layer_rnn -= 1 if max_nodes_rnn > 64: max_nodes_rnn -= 2 # reshape to be [samples][pixels][width][height] i = 0 while i < random_deep[2]: try: print("CNN ", i, "\n") model_CNN, model_tmp = BuildModel.Build_Model_CNN_Image( shape, number_of_classes, sparse_categorical, min_hidden_layer_cnn, max_hidden_layer_cnn, min_nodes_cnn, max_nodes_cnn, random_optimizor, dropout) filepath = "weights\weights_CNN_" + str(i) + ".hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] history = model_CNN.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs[2], batch_size=batch_size, callbacks=callbacks_list, verbose=2) history_.append(history) model_tmp.load_weights(filepath) model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) y_pr = model_tmp.predict_classes(x_test, batch_size=batch_size) y_proba.append(np.array(y_pr)) score.append(accuracy_score(y_test, y_pr)) i = i + 1 del model_tmp del model_CNN gc.collect() except: print("Error in model", i, " try to re-generate another model") if max_hidden_layer_cnn > 5: max_hidden_layer_cnn -= 1 if max_nodes_cnn > 128: max_nodes_cnn -= 2 min_nodes_cnn -= 1 y_proba = np.array(y_proba).transpose() print(y_proba.shape) final_y = [] for i in range(0, y_proba.shape[0]): a = np.array(y_proba[i, :]) a = collections.Counter(a).most_common()[0][0] final_y.append(a) F_score = accuracy_score(y_test, final_y) F1 = f1_score(y_test, final_y, average='micro') F2 = f1_score(y_test, final_y, average='macro') F3 = f1_score(y_test, final_y, average='weighted') cnf_matrix = confusion_matrix(y_test, final_y) # Compute confusion matrix np.set_printoptions(precision=2) if plot: # Plot non-normalized confusion matrix classes = list(range(0, np.max(y_test) + 1)) Plot.plot_confusion_matrix( cnf_matrix, classes=classes, title='Confusion matrix, without normalization') Plot.plot_confusion_matrix( cnf_matrix, classes=classes, normalize=True, title='Confusion matrix, without normalization') if plot: Plot.RMDL_epoch(history_) print(y_proba.shape) print("Accuracy of", len(score), "models:", score) print("Accuracy:", F_score) print("F1_Micro:", F1) print("F1_Macro:", F2) print("F1_weighted:", F3)