def buildPdf(condi_dist_fn, sub_path): h5_path = 'all_decoy.h5' ener_matx = loadEnerMatx(h5_path, sub_path) condi_dist = nb.getConditionalDist(ener_matx, dist_names) nb.saveCondiDistribution(condi_dist, condi_dist_fn) print condi_dist
def calculateAuc(): high_mcc_diff = h5py.File('/ddnB/work/jaydy/dat/output/linr_out/08ff_all_decoy.h5')['/gaussian_nb/high/high_mcc_diff'][()] low_mcc_diff = h5py.File('/ddnB/work/jaydy/dat/output/linr_out/08ff_all_decoy.h5')['/gaussian_nb/low/low_mcc_diff'][()] import NB_classifier as nb TPRs, FPRs = nb.constructRoc(low_mcc_diff[:, 1], high_mcc_diff[:, 1]) import numpy as np auc = np.trapz(TPRs, x=FPRs) print "AUC\t: ", auc
def constructLikelihoodDiff(self, high_pickle_fn, low_pickle_fn): """ calculate difference of the conditional prob of whole data set given low and high class """ condi_dist_h = nb.loadCondiDistribution(high_pickle_fn) condi_dist_l = nb.loadCondiDistribution(low_pickle_fn) high_pdf = nb.buildPdf(condi_dist_h) low_pdf = nb.buildPdf(condi_dist_l) ener = self.all_set[:, 1:] log_condi_predicted_high = nb.calculateConditionalProb(ener, high_pdf) log_condi_predicted_low = nb.calculateConditionalProb(ener, low_pdf) self.likelihood_diff = log_condi_predicted_low - log_condi_predicted_high
def calculateLikelihoodDiff(): h5_path = 'all_decoy.h5' sub_path = 'noncentralized_path/low_decoy' ener_matx = loadEnerMatx(h5_path, sub_path) condi_dist_fn = 'low.dist' low_pdf = nb.loadCondiDistribution(condi_dist_fn) low_pdf = nb.buildPdf(low_pdf) condi_dist_fn = 'high.dist' high_pdf = nb.loadCondiDistribution(condi_dist_fn) high_pdf = nb.buildPdf(high_pdf) likelihood_low_diff = nb.getLikelihoodDiff(ener_matx, high_pdf, low_pdf) sub_path = 'noncentralized_path/high_decoy' ener_matx = loadEnerMatx(h5_path, sub_path) likelihood_high_diff = nb.getLikelihoodDiff(ener_matx, high_pdf, low_pdf) return likelihood_high_diff, likelihood_low_diff
return f[mcc_diff_path][()] if __name__ == "__main__": ################################################################################ # print plain text file # nb_ff = NB_ff('04ff_all_decoy.h5') # nb_ff.printEner() # nb_ff = NB_ff('08ff_all_decoy.h5') # nb_ff.printEner() # nb_ff = NB_ff('06ff_all_decoy.h5') # nb_ff.printEner() ################################################################################ # build pdf import sys # matrix_fn = sys.argv[1] matrix_fn = '08ff_high_decoy.mat' # matrix_fn = '04ff_low_decoy.mat' condi_dist_fn = matrix_fn.split('.')[0] + '.dist' matrix = np.loadtxt(matrix_fn, delimiter=' ') condi_dist = nb.getConditionalDist(matrix, dist_names) nb.saveCondiDistribution(condi_dist, condi_dist_fn) condi_dist = nb.loadCondiDistribution(condi_dist_fn) print condi_dist
from lst_sub import getLst import NB_classifier as nb ener_rows_ifn = 'ener_row_name.txt' ff = '08ff' low_condi_dist_fn = ff + '_low_decoy.dist' high_condi_dist_fn = ff + '_high_decoy.dist' bayes_dist_ofn = ff + '_bayes.txt' low_condi_dist = nb.loadCondiDistribution(low_condi_dist_fn) high_condi_dist = nb.loadCondiDistribution(high_condi_dist_fn) ener_rows = getLst(ener_rows_ifn) def convertPdfName(dist_tuple): """conver the first letter of the distribution name to upper case """ name = dist_tuple[0] name = name.upper()[0] + name[1:] return (name, dist_tuple[1]) ################################################################################ # converting low_condi_dist = [convertPdfName(i) for i in low_condi_dist] high_condi_dist = [convertPdfName(i) for i in high_condi_dist] ################################################################################ high_bayes_dists = [[ener_rows[i], high_condi_dist[i][0], high_condi_dist[i][1][0], high_condi_dist[i][1][1]] for i in range(len(ener_rows))] low_bayes_dists = [[ener_rows[i], low_condi_dist[i][0], low_condi_dist[i][1][0], low_condi_dist[i][1][1]] for i in range(len(ener_rows))]
print(arr) #do ml prediction here decision = t.predict(arr, gnb_clf) #print("MLsays:") f = 1 ok = 0 if (f == 1): print(decision) cv2.imshow('res2', res2) #return ok cv2.waitKey(0) #cv2.destroyAllWindows() #train dataset t = ml.Gaussian_Naive_Bayes() X, Y = t.select_data("satellitePixel.csv") X_train, X_test, y_train, y_test = t.split_data(X, Y) gnb_clf = t.train_classifier(X_train, y_train) t.get_accuracy_score(gnb_clf, X_test, y_test) isok = 0 #take pics in loop print( "\nAquiring images and detecting pollution like from a real-time feed:\n") for m in range(1, 37): z = "E:/18.Win.Sem/Satellite/pic" + str(m) + ".jpg" print("Image number - " + str(m)) #isok=isok+pollution(z) pollution(z) #print("Accuracy till now:") #print(isok/m)
sr = Series # condi_dist_h = nb.loadCondiDistribution('08ff_high_bk.dist') # h = np.loadtxt('08ff_high_bk.mat', delimiter=' ') # condi_dist_h = nb.loadCondiDistribution('06ff_high_decoy.dist') # h = np.loadtxt('06ff_high_decoy.mat', delimiter=' ') h5_path = 'all_decoy.h5' sub_path = 'noncentralized_path/low_decoy' condi_dist_fn = 'low.dist' f = h5py.File(h5_path) dset = f[sub_path][()] ener_matx = dset[:, 1:] condi_dist_h = nb.loadCondiDistribution(condi_dist_fn) h = ener_matx total_samples = 1000 high_pdf = nb.buildPdf(condi_dist_h) for i in range(h.shape[1]): high0_pdf, fitting_para = high_pdf[i] print high0_pdf, fitting_para h0 = h[:,i] max_val = h0.max() min_val = h0.min()
def weightDiff(likelihood_diff_high, likelihood_diff_low, weights): likelihood_diff_low = np.dot(likelihood_diff_low, weights) likelihood_diff_high = np.dot(likelihood_diff_high, weights) return likelihood_diff_high, likelihood_diff_low if __name__ == '__main__': condi_dist_fn = 'low.dist' sub_path = 'noncentralized_path/low_decoy' buildPdf(condi_dist_fn, sub_path) condi_dist_fn = 'high.dist' sub_path = 'noncentralized_path/high_decoy' buildPdf(condi_dist_fn, sub_path) likelihood_diff_high, likelihood_diff_low = calculateLikelihoodDiff() weights_fn = '/work/jaydy/working/nb_ff_running/even_weight.txt' weights = np.loadtxt(weights_fn) likelihood_diff_high, likelihood_diff_low = weightDiff(likelihood_diff_high, likelihood_diff_low, weights) TPRs, FPRs = nb.constructRoc(likelihood_diff_high, likelihood_diff_low) auc = np.trapz(TPRs, x=FPRs) print "AUC\t: ", auc