def eval_global(self, measures, test_sets): histograms, testset_pdfs = {}, {} dist_vals, conf_mats, results = {}, {}, {} for m in measures: dist_vals[m], conf_mats[m], results[m] = {}, {}, {} for c in self.cl: dist_vals[m][c], conf_mats[m][c], results[m][c] = {}, {}, {} for o in self.obs: results[m][c][o] = {'right': 0, 'wrong': 0} for c in self.cl: histograms[c], testset_pdfs[c] = {}, {} for m in measures: for c_pred in self.cl: conf_mats[m][c][c_pred] = 0 for file_path in test_sets[c]: histograms[c][file_path], testset_pdfs[c][ file_path] = {}, {} dist_vals[m][c][file_path] = {} data = np.load(file_path) data_A, data_B = tools.extract_individual_data(data) obs_data = tools.compute_observables(data_A, data_B) for o in self.obs: dist_vals[m][c][file_path][o] = {} histograms[c][file_path][ o] = tools.initialize_histogram(o) histograms[c][file_path][o] = tools.compute_histogram( o, obs_data[o]) testset_pdfs[c][file_path][o] = tools.compute_pdf( o, histograms[c][file_path][o]) for c_query in self.cl: dist_vals[m][c][file_path][o][ c_query] = self.get_dists( m, testset_pdfs[c][file_path][o], histograms[c][file_path][o], self.pdfs[o][c_query]) c_pred = min(dist_vals[m][c][file_path][o].items(), key=operator.itemgetter(1))[0] conf_mats[m][c][c_pred] += 1 if c_pred == c: results[m][c][o]['right'] += 1 else: results[m][c][o]['wrong'] += 1 return results
def train(self, train_sets): histograms, self.joint_pdfs, jaccard_dist = {}, {}, {} # initialize empty histograms for o in self.obs: histograms[o], self.pdfs[o] = {}, {} for c in self.cl: histograms[o][c] = tools.initialize_histogram(o) # compute histograms for each classes obs_data_cum = {} for c in self.cl: obs_data_cum[c] = {} for o in self.obs: obs_data_cum[c][o] = [] for file_path in train_sets[c]: data = np.load(file_path) data_A, data_B = tools.extract_individual_data(data) obs_data = tools.compute_observables(data_A, data_B) for o in self.obs: histograms[o][c] += tools.compute_histogram(o, obs_data[o]) obs_data_cum[c][o].extend(obs_data[o]) for o in self.obs: for c in self.cl: self.pdfs[o][c] = tools.compute_pdf(o, histograms[o][c]) for c in self.cl: self.joint_pdfs[c], jaccard_dist[c] = {}, {} for o1 in self.obs: self.joint_pdfs[c][o1], jaccard_dist[c][o1] = {}, {} for o2 in self.obs: self.joint_pdfs[c][o1][o2] = tools.compute_joint_pdf( tools.compute_joint_histogram(o1, obs_data_cum[c][o1], o2, obs_data_cum[c][o2])) joint_ent = tools.get_joint_ent(self.joint_pdfs[c][o1][o2], self.pdfs[o1][c], self.pdfs[o2][c]) mutual_inf = tools.get_mutual_inf( self.joint_pdfs[c][o1][o2], self.pdfs[o1][c], self.pdfs[o2][c]) # i should not need th follwoign wheck but all is nan if mutual_inf is not 0: jaccard_dist[c][o1][o2] = (joint_ent - mutual_inf) / joint_ent return jaccard_dist
def get_jaccard_dist(train_fnames): histograms, pdfs, joint_pdfs, jaccard_dist = {}, {}, {}, {} # initialize empty histograms for o in preferences.OBSERVABLES: histograms[o], pdfs[o] = {}, {} for c in preferences.CLASSES: histograms[o][c] = tools.initialize_histogram(o) # compute histograms for each classes obs_data_cum = {} for c in preferences.CLASSES: obs_data_cum[c] = {} for o in preferences.OBSERVABLES: obs_data_cum[c][o] = [] for file_path in train_fnames[c]: data = np.load(file_path) data_A, data_B = tools.extract_individual_data(data) obs_data = tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: histograms[o][c] += tools.compute_histogram(o, obs_data[o]) obs_data_cum[c][o].extend(obs_data[o]) for o in preferences.OBSERVABLES: for c in preferences.CLASSES: pdfs[o][c] = tools.compute_pdf(o, histograms[o][c]) for c in preferences.CLASSES: joint_pdfs[c], jaccard_dist[c] = {}, {} for o1 in preferences.OBSERVABLES: joint_pdfs[c][o1], jaccard_dist[c][o1] = {}, {} for o2 in preferences.OBSERVABLES: joint_pdfs[c][o1][o2] = tools.compute_joint_pdf( tools.compute_joint_histogram(o1, obs_data_cum[c][o1], o2, obs_data_cum[c][o2])) joint_ent = tools.get_joint_ent(joint_pdfs[c][o1][o2], pdfs[o1][c], pdfs[o2][c]) mutual_inf = tools.get_mutual_inf(joint_pdfs[c][o1][o2], pdfs[o1][c], pdfs[o2][c]) # i should not need th follwoign wheck but all is nan if mutual_inf is not 0: jaccard_dist[c][o1][o2] = (joint_ent - mutual_inf) / joint_ent return jaccard_dist
def evaluate_distance(self, alpha, test_sets): results = {} confusion_matrix = {} # print('-------------------------------') # print('\t Right \t Wrong \t Rate\n') t = 0 for c in self.cl: results[c] = {'right': 0, 'wrong': 0} # init condusion matrix confusion_matrix[c] = {} for c_pred in self.cl: confusion_matrix[c][c_pred] = 0 for file_path in test_sets[c]: data = np.load(file_path) pdfs, distances = {}, {} # initialize distances for c_pred in self.cl: distances[c_pred] = 0 data_A, data_B = tools.extract_individual_data(data) obs_data = tools.compute_observables(data_A, data_B) for o in self.obs: pdfs[o] = tools.compute_pdf( o, tools.compute_histogram(o, obs_data[o])) for c_pred in self.cl: distances[c_pred] += stats.energy_distance( pdfs[o], self.pdfs[o][c_pred]) # t += i class_max = min(distances.items(), key=operator.itemgetter(1))[0] confusion_matrix[c][class_max] += 1 if class_max == c: results[c]['right'] += 1 else: results[c]['wrong'] += 1 rate = results[c]['right'] / (results[c]['right'] + results[c]['wrong']) # print('{}\t {}\t {}\t {}'.format(c, results[c]['right'], results[c]['wrong'], rate)) # tools.print_confusion_matrix(self.cl, confusion_matrix) # print(t) return results