def set_train_dists(self, train_fnames): # initialize empty histograms # since histogram is accumulated as below, it needs to be initialized # at every training for c in preferences.CLASSES: self.train_histograms[c] = {} self.train_pdfs[c] = {} for o in preferences.OBSERVABLES: self.train_histograms[c][ o] = generic_tools.initialize_histogram(o) # compute histograms for each class (using training set) for c in preferences.CLASSES: for train_fname in train_fnames[c]: data = np.load(train_fname) data_A, data_B = generic_tools.extract_individual_data(data) obs_data = generic_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: self.train_histograms[c][ o] += generic_tools.compute_histogram_1D( o, obs_data[o]) for c in preferences.CLASSES: for o in preferences.OBSERVABLES: self.train_pdfs[c][o] = generic_tools.compute_pdf( o, self.train_histograms[c][o])
def train(self, train_fnames): train_histograms1D = {} # initialize empty histograms for o in preferences.OBSERVABLES: train_histograms1D[o], self.train_pdfs1D[o] = {}, {} for c in preferences.CLASSES: train_histograms1D[o][c] = generic_tools.initialize_histogram( o) # compute histograms for each class for c in preferences.CLASSES: for file_path in train_fnames[c]: data = np.load(file_path) data_A, data_B = generic_tools.extract_individual_data(data) obs_data = generic_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: train_histograms1D[o][ c] += generic_tools.compute_histogram_1D( o, obs_data[o]) for o in preferences.OBSERVABLES: for c in preferences.CLASSES: self.train_pdfs1D[o][c] = generic_tools.compute_pdf( o, train_histograms1D[o][c])
def set_test_dists(self, test_fnames): # initialize empty histograms # since one histogram/pdf is computed for each element of test set # as below, it needs to be initialized at every testing for c in preferences.CLASSES: self.test_histograms[c], self.test_pdfs[c] = {}, {} for test_fname in test_fnames[c]: self.test_histograms[c][test_fname], self.test_pdfs[c][ test_fname] = {}, {} for o in preferences.OBSERVABLES: self.test_histograms[c][test_fname][ o] = generic_tools.initialize_histogram(o) self.test_pdfs[c][test_fname][o] = [] # compute histograms for each class (using test set) for c in preferences.CLASSES: for test_fname in test_fnames[c]: data = np.load(test_fname) data_A, data_B = generic_tools.extract_individual_data(data) obs_data = generic_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: self.test_histograms[c][test_fname][ o] = generic_tools.compute_histogram_1D( o, obs_data[o]) for c in preferences.CLASSES: for test_fname in test_fnames[c]: for o in preferences.OBSERVABLES: self.test_pdfs[c][test_fname][ o] = generic_tools.compute_pdf( o, self.test_histograms[c][test_fname][o])
data_fnames = generic_tools.get_data_fnames('data/classes/') for c in preferences.CLASSES: for file_path in data_fnames[c]: data = np.load(file_path) data_A, data_B = generic_tools.extract_individual_data(data) obs_data = generic_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: edges = get_edges(o) temp_hist = generic_tools.compute_histogram_1D(o, obs_data[o]) temp_pdf = generic_tools.compute_pdf(o, temp_hist) histograms1D[o][c].append(temp_hist ) pdf1D[o][c].append( temp_pdf ) mean_pdfs[o][c].append( np.average(edges, weights=temp_pdf) ) print('Obs\tF_d\tv1\tv2') print('-------------------------------') for o in preferences.OBSERVABLES: F_d, v1, v2 = get_fval(mean_pdfs[o]) print('{}\t{:2.3f}\t{}\t{}'.format(o, F_d, v1, v2)) print('\n**Check this page for computing p-value from f-statistics**') print('http:stattrek.com/online-calculator/f-distribution.aspx ') elapsed_time = time.time() - start_time