def evolve(self, names, data, population, max_parents, mut_rate, max_pop, local_search): """ Given a population, creates a new population with random pairing and mixing If local seach is true, children is the best neigbour of the random merge """ new_population = [] s_tot = sum([s for (_, s) in population]) n = len(population) population = np.random.permutation(population) for p in xrange(n / 2): (g1, s1) = population[2 * p] (g2, s2) = population[2 * p + 1] nchildren = int(n * (s1 + s2) / s_tot) + 1 for i in xrange(nchildren): if len(new_population) < max_pop: g = BayesNet(names) g.merge(g1, g2, s1 / s_tot, s2 / s_tot, max_parents, mut_rate) if local_search: g, s, _ = self.best_neighbour(names, data, g, max_parents) else: s = g.score(data) new_population += [(g, s)] if self.plotting: try: self.plt_mgr.add(name="Genetic Score", y=s) self.plt_mgr.update() except Exception, e: pass
def genetic(self, **kwargs): """ Implements genetic reproduction If local search is set to True, implements mimetic """ names = kwargs.get("names") data = kwargs.get("data") max_iter = kwargs.get("max_iter", 30) nb_start = kwargs.get("nb_start", 10) max_pop = kwargs.get("max_pop", nb_start) max_parents = kwargs.get("max_parents", None) mut_rate = kwargs.get("mut_rate", 0.01) local_search = kwargs.get("local_search", False) # initialize the population s_max = None g_max = None population = [] for i in xrange(nb_start): g = BayesNet(names) g.random_init(max_parents) if local_search: g, s, _ = self.best_neighbour(names, data, g, max_parents) else: s = g.score(data) population += [(g, s)] if s > s_max or s_max is None: s_max = s g_max = g # let evolution do its work criteria = True niter = 0 def update_criteria_from(population): s = None g = None for (_g, _s) in population: if s is None or _s > s: s = _s g = _g if s > s_max: return g, s, True else: return g_max, s_max, True while criteria and niter < max_iter: print "Iter {}, Population {}".format(niter, len(population)) population = self.evolve(names, data, population, max_parents, mut_rate, max_pop, local_search) g_max, s_max, criteria = update_criteria_from(population) if self.plotting: try: self.plt_mgr.add(name="Genetic Score Max", y=s_max) self.plt_mgr.update() except Exception, e: pass niter += 1
def brute_force(self, **kwargs): """ Sample random bayesian network and keep the best Args names (list of string): the names of the nodes data (np array): (nsamples, nfeatures) """ # get args names = kwargs.get("names") data = kwargs.get("data") nsamples = kwargs.get("nsamples", 1000) # initialize g = BayesNet(names) g.random_init() s = g.score(data) # explore for i in xrange(nsamples): sys.stdout.write("\rIter {}".format(i)) sys.stdout.flush() g_new = BayesNet(names) g_new.random_init() s_new = g_new.score(data) if s_new > s: print "\nFound new best score at {}".format(s_new) g = g_new s = s_new return g, s
def k2(self, **kwargs): """ Implements k2 algorithm """ names = kwargs.get("names") data = kwargs.get("data") max_iter = kwargs.get("max_iter", 30) nb_start = kwargs.get("nb_start", 3) max_parents = kwargs.get("max_parents", None) ordering = np.random.permutation(range(len(names))) g = BayesNet(names) s = g.score(data) for i in ordering: found_new = True while found_new: print "Node {}, score is {}".format(i, s) g, s, found_new = self.best_parent(g, s, i, data, max_parents) if self.plotting: try: self.plt_mgr.add(name="score k2 {}".format( self.start_no), y=s) self.plt_mgr.update() except Exception, e: pass
def precision_recall(): # from sklearn.metrics import roc_auc_score # from sklearn.metrics import roc_curve from sklearn.metrics import precision_recall_curve from sklearn.metrics import auc from sklearn.metrics import classification_report from mpltools import style style.use('ggplot') makes = ['bmw', 'ford'] types = ['sedan', 'SUV'] args = makes + types config = get_config(args) (dataset, config) = fgu.get_all_metadata(config) for ii, attrib_name in enumerate(args): # attrib_name = 'bmw' attrib_clf = AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name)) bnet = BayesNet(config, dataset['train_annos'], dataset['class_meta'], [attrib_clf], desc=str(args)) res = bnet.create_attrib_res_on_images() attrib_selector = AttributeSelector(config, dataset['class_meta']) # attrib_meta = attrib_selector.create_attrib_meta([attrib_clf.name]) pos_classes = attrib_selector.class_ids_for_attribute(attrib_name) true_labels = np.array(res.class_index.isin(pos_classes)) print "--------------{}-------------".format(attrib_name) print res[str.lower(attrib_name)].describe() print classification_report(true_labels, np.array(res[str.lower(attrib_name)]) > 0.65, target_names=['not-{}'.format(attrib_name), attrib_name]) precision, recall, thresholds = precision_recall_curve(true_labels, np.array(res[str.lower(attrib_name)])) score = auc(recall, precision) print("Area Under Curve: %0.2f" % score) # score = roc_auc_score(true_labels, np.array(res[str.lower(attrib_name)])) # fpr, tpr, thresholds = roc_curve(true_labels, np.array(res[str.lower(attrib_name)])) plt.subplot(2,2,ii+1) # plt.plot(fpr, tpr) plt.plot(recall, precision, label='Precision-Recall curve') plt.title('Precision-Recall: {}'.format(attrib_name)) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') plt.xlabel('Recall') plt.ylabel('Precision') plt.legend(['area = {}'.format(score)]) plt.draw() plt.show()
def cross_entropy(bn1: BayesNet, bn2: BayesNet, nsamples: int = None) -> float: cross_ent = .0 if nsamples is None: bn1_vars = bn1.nodes.keys() for sample in all_dicts(bn1_vars): cross_ent -= np.exp(bn1.sample_log_prob(sample)) * bn2.sample_log_prob(sample) else: for _ in range(nsamples): cross_ent -= bn2.sample_log_prob(bn1.sample()) cross_ent /= nsamples return cross_ent
def main(): args = get_args() table_bn = BayesNet(bn_file=args.file_name) mle_bn = MLEBayesNet(bn_file=args.file_name) parametric_bn = ParametricBayesNet(bn_file=args.file_name) print("Initial params MLE bn:") print(mle_bn.pretty_print()) print("Initial params parametric bn:") print(parametric_bn.pretty_print()) print("========== Frequentist MLE ==========") samples = read_samples(args.samples_file_name) mle_bn.learn_cpds(samples) print("Reference BN") print(table_bn.pretty_print()) print("MLE BayesNet after learning CPDs") print(mle_bn.pretty_print()) print("========== Parametric MLE ==========") # ref_cent = cross_entropy(table_bn, table_bn) # cent = cross_entropy(table_bn, parametric_bn, nsamples=100) # print("Step %6d | CE: %6.3f / %6.3f" % (0, cent, ref_cent)) for step in range(1, 1000): sample = table_bn.sample() parametric_bn.learn(sample, learning_rate=args.lr) if step % 500 == 0: print("step: ", step) # cent = cross_entropy(table_bn, parametric_bn, nsamples=200) # print("Step %6d | CE: %6.3f / %6.3f" % (step, cent, ref_cent)) # print(f"Step {step:6d} | CE: {cent:6.3f} / {ref_cent:6.3f}") print("Reference BN") print(table_bn.pretty_print()) print("Parametric BayesNet after learning CPDs") print(parametric_bn.pretty_print())
def hill_climbing(self, **kwargs): """ Implements Hill Climbing Algorithm Args names (list of string): the name of the nodes data (np array): (nsamples, nfeatures) max_iter (int): max number of iteration g0 (BayesNet): the start point Returns g: best graph found s: score of best graph """ # get args names = kwargs.get("names") data = kwargs.get("data") max_iter = kwargs.get("max_iter", 20) max_parents = kwargs.get("max_parents", None) # initialize g0 = BayesNet(names) g0.random_init(max_parents=max_parents) g = g0 s = g0.score(data) found_new = True niter = 0 # explore while found_new and niter < max_iter: print "Iter {}".format(niter) niter += 1 g, s, found_new = self.best_neighbour(names, data, g, max_parents) if self.plotting: try: self.plt_mgr.add(name="score hill climbing {}".format( self.start_no), y=s) self.plt_mgr.update() except Exception, e: pass
def best_parent(self, g, s, i, data, max_parents): """ Returns g by adding to node i the best parent that maximizes the score """ found_new = False r = g.compute_r(data) s_i = g.score_node(i, data, r) s_max = s g_max = g g_work = BayesNet(bn=g) for j in range(g.n): if j not in g_work.parents[i]: success = g_work.add_edge(j, i, max_parents) if success: s_new = s - s_i + g_work.score_node(i, data, r) if s_new > s_max: found_new = True s_max = s_new g_max = BayesNet(bn=g_work) g_work.remove_edge(j, i) return g_max, s_max, found_new
from bayes_net import BayesNet def main(): print( "Probabilidade Conjunta:", bn.jointProb([('ST', True), ('UPAL', True), ('CEA', False), ('CP', True), ('PA', True), ('FEUR', False)])) print("Probabilidade Individual:", bn.indProb(("CEA", False))) if __name__ == '__main__': bn = BayesNet() bn.add("ST", [], 0.60) bn.add("UPAL", [], 0.05) bn.add("CP", [("ST", True), ("PA", False)], 0.01) bn.add("CP", [("ST", True), ("PA", True)], 0.02) bn.add("CP", [("ST", False), ("PA", False)], 0.001) bn.add("CP", [("ST", False), ("PA", True)], 0.011) bn.add("CEA", [("ST", True)], 0.90) bn.add("CEA", [("ST", False)], 0.001) bn.add("PA", [("UPAL", True)], 0.25) bn.add("PA", [("UPAL", False)], 0.04) bn.add("FEUR", [("UPAL", False), ("PA", True)], 0.10) bn.add("FEUR", [("UPAL", False), ("PA", False)], 0.01) bn.add("FEUR", [("UPAL", True), ("PA", True)], 0.90)
def classify_using_attributes(): from sklearn.ensemble import RandomForestClassifier from sklearn import svm from sklearn.metrics import classification_report from sklearn import cross_validation makes = ['bmw', 'ford'] types = ['sedan', 'suv'] args = makes + types + ['germany', 'usa'] # args = get_args_from_file('sorted_attrib_list.txt') config = get_config() (dataset, config) = fgu.get_all_metadata(config) config.attribute.names = args attrib_names = [str.lower(a) for a in args] attrib_classifiers = [] for attrib_name in args: attrib_classifiers.append(AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name))) classes = dataset['class_meta'] train_annos = dataset['train_annos'] test_annos = dataset['test_annos'] attrib_meta = dataset['attrib_meta'] classes = select_small_set_for_bayes_net(dataset, makes, types) attrib_meta = attrib_meta.loc[classes.index] train_annos = train_annos[np.array( train_annos.class_index.isin(classes.class_index))] test_annos = test_annos[np.array( test_annos.class_index.isin(classes.class_index))] ftr = Bow.load_bow(train_annos, config) fte = Bow.load_bow(test_annos, config) bnet = BayesNet(config, train_annos, classes, attrib_classifiers, attrib_meta, desc=str(args)) attrib_res_train,l = bnet.create_attrib_res_on_images(train_annos, ftr) attrib_res_test,l = bnet.create_attrib_res_on_images(test_annos, fte) # features_train = Bow.load_bow(train_annos, config) # features_test = Bow.load_bow(test_annos, config) # combine attribs and features features_train = np.concatenate([ftr, attrib_res_train[attrib_names]], axis=1) features_test = np.concatenate([fte, attrib_res_test[attrib_names]], axis=1) # define a classifier that uses the attribute scores # clf = RandomForestClassifier(n_estimators=50, n_jobs=-2) # clf = svm.SVC(kernel='rbf') clf = svm.LinearSVC() labels_train = np.array(attrib_res_train.class_index) # features_train = np.array(attrib_res_train[attrib_names]) clf.fit(features_train, labels_train) # features_test = np.array(attrib_res_test[attrib_names]) y_pred = clf.predict(features_test) labels_test = np.array(attrib_res_test.class_index) print(classification_report(labels_test, y_pred, labels=classes.index, target_names=[c for c in classes.class_name])) print("Accuracy: {}".format(accuracy_score(labels_test, y_pred))) print("Mean Accuracy: {}".format(clf.score(features_test, labels_test))) print '' print 'Accuracy at N:' scorer = AccuracyAtN(clf.decision_function(features_test), labels_test, class_names=np.unique(labels_train)) for ii in range(1, 11): print 'Accuracy at {}: {}'.format(ii, scorer.get_accuracy_at(ii)) dummy_1 = DummyClassifier(strategy='most_frequent').fit(features_train, labels_train) dummy_2 = DummyClassifier(strategy='stratified').fit(features_train, labels_train) dummy_3 = DummyClassifier(strategy='stratified').fit(features_train, labels_train) print '' print 'Dummy Classifiers:' print '-----------------' print("Accuracy - most_frequent: {}".format(accuracy_score(labels_test, dummy_1.predict(features_test)))) print("Accuracy - stratified: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test)))) print("Accuracy - uniform: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test)))) print("Mean Accuracy - most_frequent: {}".format(dummy_1.score(features_test, labels_test))) print("Mean Accuracy - stratified: {}".format(dummy_2.score(features_test, labels_test))) print("Mean Accuracy - uniform: {}".format(dummy_3.score(features_test, labels_test)))
def best_neighbour(self, names, data, g0, max_parents): """ Find best neighboor of a BN Args names (list of string): the name of the nodes data (np array): (nsamples, nfeatures) g0 (BayesNet): the reference Returns g: best neighbour s: score of best neighbour """ print "Searching for best neighbour" # reference variables n = g0.n r = g0.compute_r(data) s0 = g0.score(data) # best candidate so far g = BayesNet(bn=g0) s = s0 s_eps = s0 found_new = False # working graph g_work = BayesNet(bn=g0) if max_parents is None: max_parents = n - 1 def update_best(mode="add"): """ When called, evaluate the working graph and update best candidate The s update must take place out of the function scope for python limitations """ # if mode == "rem" or not g_work.is_cyclic(): s_new = s0 - s_i + g_work.score_node(i, data, r) # we give a random advantage to the candidate based on previous updates s_eps_new = s_new + self.epsilon * np.random.rand() if s_eps_new > s_eps: print "Found new candidate ({}) at {}".format(mode, s_new) g.copy(g_work) return s_new, s_eps_new, True return s, s_eps, found_new # iterate over node center of the modification for i in xrange(n): parents = g0.parents[i] s_i = g0.score_node(i, data, r) # 1. remove edge for j in parents: g_work.remove_edge(j, i) s, s_eps, found_new = update_best("rem") g_work.add_edge(j, i) # 2. add edge if len(parents) < max_parents: for j_prime in xrange(n): if j_prime not in parents: if g_work.add_edge(j_prime, i): s, s_eps, found_new = update_best("add") g_work.remove_edge(j_prime, i) # 3. reverse direction for j in parents: if len(g0.parents[j]) < max_parents: g_work.remove_edge(j, i) if g_work.add_edge(i, j): s, s_eps, found_new = update_best("rev") g_work.remove_edge(i, j) g_work.add_edge(j, i) self.update_epsilon(s0, s) return g, s, found_new
Figure 14.9 The enumeration algorithm for answering queries on Bayesian networks. <br> <br> <b>Note:</b> The implementation has been extended to handle queries with multiple variables. <br> """ if len(variables) == 0: return 1.0 y = variables[0] p_vals = [evidence[parent] for parent in y.potential.othernodes] if y in evidence.keys(): return get_cpt_entry(y, evidence[y], p_vals) * enumerate_all(variables[1:], evidence) val = 0.0 for i in range(len(y.states)): evidence_copy = evidence.copy() evidence_copy[y] = i val += get_cpt_entry(y, i, p_vals) * enumerate_all(variables[1:], evidence_copy) return val if __name__ == "__main__": nodes, potentials = netlog('./asia.net') bn = BayesNet(nodes, potentials) n_asia = findnode('asia', nodes) n_either = findnode('either', nodes) n_xray = findnode('xray', nodes) ev = {n_xray : 1, n_either : 1} ordered_vars = [potentials[i].node for i in t_ordered_vars(potentials)] print (enumeration_ask(bn, n_asia, ev))
'A0': [0.5, 0.5], 'A1': [1 - 1e-6, 1e-6] }, E: { 'B0C0': [0.1, 0.9], 'B0C1': [1e-7, 1 - 1e-7], 'B1C0': [1 - 1e-10, 1e-10], 'B1C1': [1e-5, 1 - 1e-5] }, D: { 'A0': [0.4, 0.6], 'A1': [0.1, 0.9] } } net = BayesNet(graph=GRAPH, cpt=CPT) samples = net.msample() print(mean([s[B] for s in samples])) def spread(w): """ The closer to zero, the better. """ result = max(w) / sum(w) return result def f(x): return 1