def hill_climbing_multiple_loops(data_set, metric='AIC', debug=False): bayes_net = BayesNet(data_set) score = bayes_net.score(data_set, metric) while True: max_score = score Learning.log('Score: ' + str(score), debug) for node_i in bayes_net.nodes(): for node_j in bayes_net.nodes(): if node_i != node_j and not bayes_net.is_parent( node_j, node_i): if not bayes_net.check_cycle(node_j, node_i): bayes_net.add_edge(node_j, node_i) new_score = bayes_net.score(data_set, metric) if new_score <= score: bayes_net.delete_edge(node_j, node_i) else: score = new_score Learning.log( 'Adding edge ' + str(node_j) + ' -> ' + str(node_i) + '. New score: ' + str(score), debug) for node_i in bayes_net.nodes(): for node_j in bayes_net.pa(node_i): bayes_net.delete_edge(node_j, node_i) new_score = bayes_net.score(data_set, metric) if new_score <= score: bayes_net.add_edge(node_j, node_i) else: score = new_score Learning.log( 'Deleting edge ' + str(node_j) + ' -> ' + str(node_i) + '. New score: ' + str(score), debug) for node_i in bayes_net.nodes(): for node_j in bayes_net.pa(node_i): if not bayes_net.check_cycle(node_i, node_j, True): bayes_net.reverse_edge(node_j, node_i) new_score = bayes_net.score(data_set, metric) if new_score <= score: bayes_net.reverse_edge(node_i, node_j) else: score = new_score Learning.log( 'Reversing edge ' + str(node_j) + ' -> ' + str(node_i) + '. New score: ' + str(score), debug) if score <= max_score: break Learning.log('Learning bayes net ended. Score achieved: ' + str(score), debug) return bayes_net
def TAN(data_set, metric='', debug=True): Learning.log('TAN: Learning bayes net started.', debug) data_set_1 = np.array(data_set) class_values = data_set_1[:, len(data_set[0]) - 1] data_set = data_set_1[:, :len(data_set[0]) - 1] bayes_net = BayesNet(data_set) possible_class_values = [] for x in class_values: if x not in possible_class_values: possible_class_values.append(x) n = bayes_net.get_data_set_rows_number(data_set) weights = {} l = float(len(class_values)) Learning.log('TAN: Mutual information calculating in progress.', debug) for node_i in bayes_net.nodes(): weights[node_i] = {} for node_j in bayes_net.nodes(): already_included = False if node_j in weights.keys(): if node_i in weights[node_j].keys(): weights[node_i][node_j] = weights[node_j][node_i] already_included = True if node_i != node_j and not already_included: values_i = list(bayes_net.net[node_i]['possible_values']) values_j = list(bayes_net.net[node_j]['possible_values']) mutual_information = 0 xxx = 0 for k in range(0, len(possible_class_values)): for i in range(0, len(values_i)): for j in range(0, len(values_j)): count_i = float(bayes_net.data[node_i].count( values_i[i])) count_j = float(bayes_net.data[node_j].count( values_j[j])) count_k = float( np.count_nonzero(class_values == possible_class_values[k])) count_x = 0.0 count_z = 0.0 for index in range( 0, len(bayes_net.data[node_i])): if bayes_net.data[node_i][ index] == values_i[i]: if class_values[ index] == possible_class_values[ k]: count_z += 1.0 if bayes_net.data[node_j][ index] == values_j[j]: count_x += 1.0 count_y = 0.0 for index in range( 0, len(bayes_net.data[node_j])): if bayes_net.data[node_j][ index] == values_j[j]: if class_values[ index] == possible_class_values[ k]: count_y += 1.0 Pi = float(count_i / l) Pj = float(count_j / l) Pk = float(count_k / l) Pijk = Pi * Pj * Pk Px = float(count_x / count_k) Pz = float(count_z / count_k) Py = float(count_y / count_k) if Pz != 0.0 and Py != 0 and Px != 0: mutual_information = mutual_information + Pijk * log( float(Px / (Pz * Py))) weights[node_i][node_j] = mutual_information Learning.log( 'TAN: Mutual information for nodes: ' + str(node_i) + " " + str(node_j) + ' : ' + str(weights[node_i][node_j]), debug) Learning.log('TAN: Mutual informations calculating done.', debug) print weights edges = {} possible_edges_num = len( bayes_net.net.keys()) * (len(bayes_net.net.keys()) - 1) / 2 Learning.log('TAN: Tree building in progress.', debug) while possible_edges_num > 0: causing_cycle = False check_next = True i = 0 create_edge = False max_weight = -100000 vertex_1 = None vertex_2 = None not_to_be_checked = {} while check_next: max_weight = -100000 for node_i in bayes_net.nodes(): for node_j in bayes_net.nodes(): to_pass = False if node_i not in not_to_be_checked.keys(): if node_j not in not_to_be_checked.keys(): to_pass = True elif node_i not in not_to_be_checked[node_j]: to_pass = True elif node_j not in not_to_be_checked[node_i]: to_pass = True if to_pass: if (node_i not in edges.keys() or node_j not in edges.keys() or node_j not in edges[node_i].keys() or node_i not in edges[node_j].keys())\ and node_i != node_j: if weights[node_i][node_j] > max_weight: max_weight = weights[node_i][node_j] vertex_1 = node_i vertex_2 = node_j causing_cycle = bayes_net.check_cycles_no_directions( edges, vertex_1, vertex_2) if causing_cycle: create_edge = True causing_cycle = False check_next = False # possible_edges_num -= 1 # break else: i += 1 if vertex_1 not in not_to_be_checked.keys(): not_to_be_checked[vertex_1] = [] if vertex_2 not in not_to_be_checked.keys(): not_to_be_checked[vertex_2] = [] not_to_be_checked[vertex_1].append(vertex_2) not_to_be_checked[vertex_2].append(vertex_1) if i == possible_edges_num: check_next = False i = 0 if create_edge: edges = bayes_net.add_edge_no_directions( vertex_1, vertex_2, edges, max_weight) possible_edges_num -= 1 print edges Learning.log('TAN: Tree building in progress - adding directions.', debug) root = bayes_net.net.keys()[0] possible = [] l = len(edges.keys()) while l > 0: copy_dict = copy.deepcopy(edges) if root in copy_dict.keys(): for child in copy_dict[root].keys(): bayes_net.add_edge(root, child) del edges[root][child] del edges[child][root] possible.append(child) if len(possible) > 0: root = possible[0] del possible[0] l = len(possible) Learning.log('TAN: Tree building done.', debug) Learning.log( 'TAN: Adding class as a root and linking it to all other nodes.', debug) for node in bayes_net.nodes(): bayes_net.net[node]['parents'].append('root') bayes_net.vertexes = bayes_net.vertexes[:-1] bayes_net.vertexes.append('root') root_children = bayes_net.net.keys() bayes_net.net['root'] = {} bayes_net.net['root']['possible_values'] = possible_class_values bayes_net.net['root']['children'] = root_children bayes_net.net['root']['parents'] = [] bayes_net.net['root']['probabilities'] = [] for node in bayes_net.net.keys(): print "node: ", node, " parents: ", bayes_net.net[node][ 'parents'], " children: ", bayes_net.net[node]['children'] Learning.log('TAN: Creating TAN tree done.', debug) score = bayes_net.score(data_set, metric) Learning.log('TAN: Score: ' + str(score), debug) return bayes_net