def plot_confusion_matrix(cm, classes_types, ofname, normalize=False, title='Confusion matrix', cmap=plt.cm.RdPu, show=True): # plt.cm.Reds): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') cm = cm.astype('int') print(cm) plt.figure(figsize=(9, 8)) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title, fontsize=16) cb = plt.colorbar(fraction=0.046, pad=0.04) cb.ax.tick_params(labelsize=16) tick_marks = np.arange(len(classes_types)) plt.xticks(tick_marks, classes_types, rotation=45) plt.yticks(tick_marks, classes_types) plt.tick_params(axis='x', labelsize=16) plt.tick_params(axis='y', labelsize=16) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if normalize: plt.text(j, i, "{:0.2f}".format(cm[i, j]), horizontalalignment="center", color="white" if (cm[i, j] < 0.01) or (cm[i, j] >= 0.75) else "black", fontsize=18) else: plt.text(j, i, "{:0}".format(cm[i, j]), horizontalalignment="center", color="white" if (cm[i, j] < 3) or (cm[i, j] >= 100) else "black", fontsize=18) plt.ylabel('True label', fontsize=16) plt.xlabel('Predicted label', fontsize=16) plt.tight_layout() ensure_dir(ofname) plt.savefig(ofname, bbox_inches='tight', pad_inches=0.1) if show: plt.show()
def feature_maps(model, x_train, y_train, img_index, ofname, show=True): ''' Function to plot the feature maps of the convolutional layer INPUTS: x_train: The training set images y_train: The label of the training set image_index: Choose the index of a single candidate file- values can range from 0 to Nimages ofname: The directory to save the feature maps for RFI and FRB ''' # summarize feature map shapes for i in range(len(model.layers)): layer = model.layers[i] # check for convolutional layer if 'conv' not in layer.name: continue # summarize output shape print(i, layer.name, layer.output.shape) # redefine model to output right after the first hidden layer redefine_model = Model(inputs=model.inputs, outputs=model.layers[1].output) feature_maps = redefine_model.predict(x_train) # plot 16 maps in an 4x4 squares square = 4 ix = 1 plt.figure(figsize=(8, 8)) for _ in range(square): for _ in range(square): # specify subplot and turn of axis ax = plt.subplot(square, square, ix) ax.set_xticks([]) ax.set_yticks([]) # plot filter channel in grayscale plt.imshow(feature_maps[img_index, :, :, ix - 1]) #, cmap='gray') ix += 1 plt.tight_layout() ensure_dir(ofname) plt.savefig(ofname, bbox_inches='tight', pad_inches=0.1) if show: plt.show()
def plot_roc(fpr, tpr, auc, ofname, show=True): ''' Function to plot the ROC curves (Receiver Characteristic Curves) INPUTS: fpr: False Positive rate tpr: True Positive Rate auc: Area under curve ofname: The directory to save the roc curve ''' plt.figure(figsize=figSize) plt.plot(fpr, tpr, lw=2.5, label='ROC curve (area = {0:0.2f})'.format(auc)) plt.xlabel("FPR", fontsize=fontSize) plt.ylabel("TPR", fontsize=fontSize) plt.ylim([0.0, 1.01]) plt.tick_params(axis='both', labelsize=fontSize) plt.legend(loc="best", prop={'size': 14}, bbox_to_anchor=(1, 0.15)) plt.tight_layout() ensure_dir(ofname) plt.savefig(ofname, bbox_inches='tight', pad_inches=0.1) if show: plt.show() plt.close()
def FRB_prediction(model_name, X_test, ID, result_dir, probability): ''' The code will load the pre-trained network and it will perform prediction on new candidate file. INPUT: model_name: 'NET1_32_64', 'NET1_64_128', 'NET1_128_256', 'NET2', 'NET3' X_test : Image data should have shape (Nimages,100,100,3), (Nimages,30,30,3), (Nimages,30,30,4). This will vary depending on the criteria one use for min_pix, max_pix and num_images. ID: The transient ID extracted from the csv file ID=data.iloc[:,0] result_dir: The directory to save the csv prediction file OUTPUT: overall_real_prob: An array of probability that each source is real. Value will range between [0 to 1.0] overall_dataframe: A table with column transientid of all sources and its associated probability that it is a real source ''' # load json and create model json_file = open("./FRBID_model/" + model_name + ".json", 'r') loaded_model_json = json_file.read() json_file.close() fit_model = model_from_json(loaded_model_json) # load weights into new model fit_model.load_weights("./FRBID_model/" + model_name + ".h5") print("Loaded model:" + model_name + " from disk") # Overall prediction for the whole sample overall_probability = fit_model.predict_proba(X_test) # For all the candidate, output the probability that it is a real source overall_real_prob = overall_probability[:, 1] overall_dataframe = pd.DataFrame(ID, columns=['candidate']) overall_dataframe['probability'] = overall_real_prob overall_dataframe['label'] = np.round(overall_real_prob >= probability) ensure_dir(result_dir) overall_dataframe.to_csv(result_dir + 'results_' + model_name + '.csv', index=None) return overall_real_prob, overall_dataframe
def optimsation_curve(history_, plot_dir1, plot_dir2, show=True): ''' Function to plot the accuracy and loss during training and validation INPUTS: history_: The log history of the fully trained model plot_dir1: The directory to save accuracy curves plot_dir2: The directory to save the loss curves ''' plt.figure(figsize=figSize) plt.plot(history_.history['acc'], c='r', lw=2.5, label='Training') plt.plot(history_.history['val_acc'], c='b', lw=2.5, label='validation') plt.xlabel('Epoch', fontsize=fontSize) plt.ylabel('Accuracy', fontsize=fontSize) plt.tick_params(axis='both', labelsize=fontSize) plt.legend(loc="best", prop={'size': 14}, bbox_to_anchor=(1, 0.5)) plt.tight_layout() ensure_dir(plot_dir1) plt.savefig(plot_dir1, bbox_inches='tight', pad_inches=0.1) if show: plt.show() plt.close() plt.figure(figsize=figSize) plt.plot(history_.history['loss'], c='r', lw=2.5, label='Training') plt.plot(history_.history['val_loss'], c='b', lw=2.5, label='validation') plt.xlabel('Epoch', fontsize=fontSize) plt.ylabel('Loss', fontsize=fontSize) plt.tick_params(axis='both', labelsize=fontSize) plt.legend(loc="best", prop={'size': 14}, bbox_to_anchor=(1, 0.5)) plt.tight_layout() ensure_dir(plot_dir2) plt.savefig(plot_dir2, bbox_inches='tight', pad_inches=0.1) if show: plt.show() plt.close()
def plot_images(data, ID, y_true, odir, savefig=False, show=True): ''' Function to plot the input images: DM_time and Frequency_time image INPUTS: data: The candidate images ID: The transient ID y_true: The label of the images odir: The directory to save the images savefig: True if one want to save the images ''' for j in range(data.shape[0]): fig, axs = plt.subplots(1, data.shape[3], figsize=(15, 4), facecolor='w', edgecolor='k') fig.subplots_adjust(hspace=.2, wspace=.05) titles = ['DM-Time', 'Frequency-Time'] axs = axs.ravel() for i in range(1, data.shape[3] + 1): varray = data[j, :, :, i - 1] im = axs[i - 1].imshow(varray[:, :]) #,cmap='gray') cb = fig.colorbar(im, fraction=0.046, pad=0.04, ax=axs[i - 1]) cb.ax.tick_params(labelsize=14) axs[i - 1].title.set_text(titles[i - 1]) plt.tight_layout() if savefig: ofname = os.path.join(odir, str(y_true[j]), str(ID[j]) + ".pdf") ensure_dir(ofname) plt.savefig(ofname, bbox_inches='tight', pad_inches=0.1) if show: plt.show()
def save_classified_examples(X_test, y_test, ID_test, correct_classification, probability, odir_real, odir_bogus, savecsv=True): ''' Function to save the overall probability of each source in csv files INPUTS: X_test, y_test: Test candidates having images and its associated labels ID_test: The transient id for each candidate correct_classification: An array of indices from the test set indices that indicates which indices (images) are correctly classified probability: The overall probability of each candidate varies betwwen 0 to 1. For a candidate, it outputs prob = [0.1, 0.9], this candidate is therefore a real candidate with prob 0.9 and has a probability of 0.1 that it is bogus odir_real: The directory to save the csv file for real candidate odir_bogus: The directory to save the csv file for bogus candidate savecsv: True to save the csv ''' overall_probability_real = pd.DataFrame(ID_test, columns=['transientid']) overall_probability_real['ML_PROB_FRB'] = probability[:, 1] correct_classification_array = correct_classification y_true_correctly_classified = y_test[correct_classification_array] ID_correctly_classified = ID_test[correct_classification_array] correctly_classified_img = X_test[correct_classification_array] prob_correctly_classified = probability[correct_classification_array] # select the real and bogus indices that were correctly classified bogus_true_indices = correct_classification_array[ y_true_correctly_classified == 0] real_true_indices = correct_classification_array[ y_true_correctly_classified == 1] # Assign probability to a source being real or bogus prob_bogus = probability[bogus_true_indices, 0] prob_real = probability[real_true_indices, 1] # Select the transient ID of real and bogus that were correctly classified ID_bogus = ID_test[bogus_true_indices] ID_real = ID_test[real_true_indices] # Select the image array of real and bogus that were correctly classified correctly_cfd_real_img = X_test[real_true_indices] correctly_cfd_bogus_img = X_test[bogus_true_indices] # Create a DataFrame to store the transient ID and Probability of each source in separate csv file and # save then in different directory correctly_classified_bogus = pd.DataFrame(ID_bogus, columns=['transientid']) correctly_classified_bogus['ML_PROB'] = prob_bogus correctly_classified_real = pd.DataFrame(ID_real, columns=['transientid']) correctly_classified_real['ML_PROB'] = prob_real if savecsv: ofname_real = os.path.join(odir_real) ensure_dir(ofname_real) overall_probability_real.to_csv( ofname_real + 'probability_candidate_classified_as_frb.csv', index=None) ofname_real = os.path.join(odir_real) ensure_dir(ofname_real) correctly_classified_real.to_csv(ofname_real + 'correctly_classified_frb.csv', index=None) ofname_bogus = os.path.join(odir_bogus) ensure_dir(ofname_bogus) correctly_classified_bogus.to_csv(ofname_bogus + 'correctly_classified_rfi.csv', index=None) return overall_probability_real, correctly_classified_bogus, correctly_classified_real