def simple_search(self, max_edges=None, tol=10**-6): """ Simple data driven structure learning search. max_edges is the upper bound on the number of edges added to the BN. """ # If max_edges is not provided try to add up to twice as many edges as # there are nodes if max_edges is None: max_edges = 2 * self.node_index.size for i in self.node_index: self.p_score(i) best_deltas = np.amax(self.delta_cache, axis=1) cnode = np.argmax(best_deltas) pnode = np.argmax(self.delta_cache[cnode, :]) cur_iter = 0 while np.max(best_deltas) > tol and cur_iter < max_edges: print('iteration %s' % cur_iter) print(best_deltas[cnode], cnode, self.delta_index[cnode, pnode]) self.add_edge_and_sync(cnode, pnode) best_deltas = np.amax(self.delta_cache, axis=1) cnode = np.argmax(best_deltas) pnode = np.argmax(self.delta_cache[cnode, :]) cur_iter += 1 return self.net_score, np.sum(np.diag(self.scores))
def score_net(self): """ Score the constructed BN and do nothing else """ score = 0 for child in self.node_index: subset = [child] + self.BN.pnodes[child] self.scores[child] = self.objfunc(self.data[:, subset], self.arity[subset]) self.net_score = np.sum(self.scores) return self.net_score
def gsrestarts(self, nrestarts=10, tol=10**-6): """ Stochastically perturbed descent search - the primary general searching method. Attempts to improve optimality of high order relations and to avoid potentially present local minima. """ self.grad_search() tmpBN = deepcopy(self.BN) tmpscore = np.sum(self.scores) for iter in range(nrestarts): [self.BN.remove_random_edge(self.remove_edge_and_sync) for i in \ range(np.random.randint(1,self.arity.size/2))] self.score_net() self.grad_search() current_score = np.sum(self.scores) if current_score > tmpscore: print('found') tmpBN = deepcopy(self.BN) tmpscore = current_score self.BN = deepcopy(tmpBN) self.score_net()
def __init__(self, dt, objfunc='bdm', cache_size=1): """ dt: dataset instance of dutils.dataset objfunc: 'bdm' or 'mdl', defalut is 'bdm' If C extension is properly compiled 'cmdla' (AIC) and 'cmdlb' (BIC)options may also be availabe """ self.objfunc = eval(objfunc) self.data = dt.data self.arity = dt.arity self.variables = dt.variables self.BN = bnet(self.variables) node_index = np.asarray([i for i, j in enumerate(self.variables)]) self.node_index = node_index self.scores = [ self.objfunc(self.data[:, [i]], self.arity[[i]]) for i in node_index ] self.net_score = np.sum(self.scores) self.cache_size = self.arity.size if cache_size: self.cache_size = cache_size self.delta_cache = np.zeros((node_index.size, self.cache_size)) self.delta_index = np.zeros((node_index.size, self.cache_size), dtype=np.int) self.delta_tmp = np.zeros(node_index.size) self.remove_deltas = np.zeros( node_index.size) #[0 for i in node_index] #self.remove_candidates=np.zeros(node_index.size,dtype=np.int) self.remove_candidates = [[] for i in node_index] #self.rdelta_cache=np.zeros((node_index.size,cache_size)) #self.rdelta_index=np.zeros((node_index.size,cache_size),dtype=np.int) #self.rdelta_tmp=np.zeros(node_index.size) for i in self.node_index: self.p_score(i) self.reverse_p_score(i)
def grad_search(self, max_edges=None, tol=10**-6): """ Simple data driven structure learning search. max_edges is the upper bound on the number of edges added to the BN. Should NOT be used as a general search (without understanding of the limitations of max descent searching techniqes), designed to be a subroutine for a more sophisticated method. """ # If max_edges is not provided try to add up to three timese as many edges as # there are nodes if max_edges is None: max_edges = 3 * self.node_index.size best_deltas = np.amax(self.delta_cache, axis=1) #cnode=np.argmax(best_deltas) #pnode=np.argmax(self.delta_cache[cnode,:]) cur_iter = 0 while max(np.max(best_deltas),np.max(self.remove_deltas))>tol \ and cur_iter<max_edges: #print( 'iteration %s' %cur_iter) if max(best_deltas) > max(self.remove_deltas): cnode = np.argmax(best_deltas) pnode = np.argmax(self.delta_cache[cnode, :]) #print("adding edge (%d %d) with %f" \ # %(cnode,self.delta_index[cnode,pnode],best_deltas[cnode])) self.add_edge_and_sync(cnode, pnode) self.reverse_p_score(cnode) else: cnode = np.argmax(self.remove_deltas) pnode = self.remove_candidates[cnode][0] #print("removing edge (%d %d) with %f" \ # %(cnode,pnode,self.remove_deltas[cnode])) self.remove_edge_and_sync(cnode, pnode) self.remove_candidates[cnode] = 0 self.remove_deltas[cnode] = 0 self.reverse_p_score(cnode) best_deltas = np.amax(self.delta_cache, axis=1) cur_iter += 1 return self.net_score, np.sum(self.scores)