def constructLikelihoodDiff(self, high_pickle_fn, low_pickle_fn): """ calculate difference of the conditional prob of whole data set given low and high class """ condi_dist_h = nb.loadCondiDistribution(high_pickle_fn) condi_dist_l = nb.loadCondiDistribution(low_pickle_fn) high_pdf = nb.buildPdf(condi_dist_h) low_pdf = nb.buildPdf(condi_dist_l) ener = self.all_set[:, 1:] log_condi_predicted_high = nb.calculateConditionalProb(ener, high_pdf) log_condi_predicted_low = nb.calculateConditionalProb(ener, low_pdf) self.likelihood_diff = log_condi_predicted_low - log_condi_predicted_high
def calculateLikelihoodDiff(): h5_path = 'all_decoy.h5' sub_path = 'noncentralized_path/low_decoy' ener_matx = loadEnerMatx(h5_path, sub_path) condi_dist_fn = 'low.dist' low_pdf = nb.loadCondiDistribution(condi_dist_fn) low_pdf = nb.buildPdf(low_pdf) condi_dist_fn = 'high.dist' high_pdf = nb.loadCondiDistribution(condi_dist_fn) high_pdf = nb.buildPdf(high_pdf) likelihood_low_diff = nb.getLikelihoodDiff(ener_matx, high_pdf, low_pdf) sub_path = 'noncentralized_path/high_decoy' ener_matx = loadEnerMatx(h5_path, sub_path) likelihood_high_diff = nb.getLikelihoodDiff(ener_matx, high_pdf, low_pdf) return likelihood_high_diff, likelihood_low_diff
return f[mcc_diff_path][()] if __name__ == "__main__": ################################################################################ # print plain text file # nb_ff = NB_ff('04ff_all_decoy.h5') # nb_ff.printEner() # nb_ff = NB_ff('08ff_all_decoy.h5') # nb_ff.printEner() # nb_ff = NB_ff('06ff_all_decoy.h5') # nb_ff.printEner() ################################################################################ # build pdf import sys # matrix_fn = sys.argv[1] matrix_fn = '08ff_high_decoy.mat' # matrix_fn = '04ff_low_decoy.mat' condi_dist_fn = matrix_fn.split('.')[0] + '.dist' matrix = np.loadtxt(matrix_fn, delimiter=' ') condi_dist = nb.getConditionalDist(matrix, dist_names) nb.saveCondiDistribution(condi_dist, condi_dist_fn) condi_dist = nb.loadCondiDistribution(condi_dist_fn) print condi_dist
from lst_sub import getLst import NB_classifier as nb ener_rows_ifn = 'ener_row_name.txt' ff = '08ff' low_condi_dist_fn = ff + '_low_decoy.dist' high_condi_dist_fn = ff + '_high_decoy.dist' bayes_dist_ofn = ff + '_bayes.txt' low_condi_dist = nb.loadCondiDistribution(low_condi_dist_fn) high_condi_dist = nb.loadCondiDistribution(high_condi_dist_fn) ener_rows = getLst(ener_rows_ifn) def convertPdfName(dist_tuple): """conver the first letter of the distribution name to upper case """ name = dist_tuple[0] name = name.upper()[0] + name[1:] return (name, dist_tuple[1]) ################################################################################ # converting low_condi_dist = [convertPdfName(i) for i in low_condi_dist] high_condi_dist = [convertPdfName(i) for i in high_condi_dist] ################################################################################ high_bayes_dists = [[ener_rows[i], high_condi_dist[i][0], high_condi_dist[i][1][0], high_condi_dist[i][1][1]] for i in range(len(ener_rows))] low_bayes_dists = [[ener_rows[i], low_condi_dist[i][0], low_condi_dist[i][1][0], low_condi_dist[i][1][1]] for i in range(len(ener_rows))]