def iamb(data, alpha=0.05, feature_selection=None, debug=False): """ IAMB Algorithm for learning the structure of a Discrete Bayesian Network from data. Arguments --------- *data* : a nested numpy array *alpha* : a float The type II error rate. *feature_selection* : None or a string Whether to use IAMB as a structure learning or feature selection algorithm. Returns ------- *bn* : a BayesNet object or *mb* : the markov blanket of a node Effects ------- None Notes ----- - Works but there are definitely some bugs. Speed Test: *** 5 vars, 624 obs *** - 196 ms """ n_rv = data.shape[1] Mb = dict([(rv, []) for rv in range(n_rv)]) if feature_selection is None: _T = range(n_rv) else: assert (not isinstance(feature_selection, list) ), 'feature_selection must be only one value' _T = [feature_selection] # LEARN MARKOV BLANKET for T in _T: V = set(range(n_rv)) - {T} Mb_change = True # GROWING PHASE while Mb_change: Mb_change = False # find X_max in V-Mb(T)-{T} that maximizes # mutual information of X,T|Mb(T) # i.e. max of mi_test(data[:,(X,T,Mb(T))]) max_val = -1 max_x = None for X in V - set(Mb[T]) - {T}: cols = (X, T) + tuple(Mb[T]) mi_val = mi_test(data[:, cols], test=False) if mi_val > max_val: max_val = mi_val max_x = X # if Xmax is dependent on T given Mb(T) cols = (max_x, T) + tuple(Mb[T]) if max_x is not None and are_independent(data[:, cols]): Mb[T].append(X) Mb_change = True if debug: print('Adding %s to MB of %s' % (str(X), str(T))) # SHRINKING PHASE for X in Mb[T]: # if x is independent of t given Mb(T) - {x} cols = (X, T) + tuple(set(Mb[T]) - {X}) if are_independent(data[:, cols], alpha): Mb[T].remove(X) if debug: print('Removing %s from MB of %s' % (str(X), str(T))) if feature_selection is None: # RESOLVE GRAPH STRUCTURE edge_dict = resolve_markov_blanket(Mb, data) if debug: print('Unoriented edge dict:\n %s' % str(edge_dict)) print('MB: %s' % str(Mb)) # ORIENT EDGES oriented_edge_dict = orient_edges_gs2(edge_dict, Mb, data, alpha) if debug: print('Oriented edge dict:\n %s' % str(oriented_edge_dict)) # CREATE BAYESNET OBJECT value_dict = dict( zip(range(data.shape[1]), [list(np.unique(col)) for col in data.T])) bn = BayesNet(oriented_edge_dict, value_dict) return bn else: return Mb[_T]
def gs(data, alpha=0.05, feature_selection=None, debug=False): """ Perform growshink algorithm over dataset to learn Bayesian network structure. This algorithm is clearly a good candidate for numba JIT compilation... STEPS ----- 1. Compute Markov Blanket 2. Compute Graph Structure 3. Orient Edges 4. Remove Cycles 5. Reverse Edges 6. Propagate Directions Arguments --------- *data* : a nested numpy array Data from which you wish to learn structure *alpha* : a float Type I error rate for independence test Returns ------- *bn* : a BayesNet object Effects ------- None Notes ----- Speed Test: *** 5 variables, 624 observations *** - 63.7 ms """ n_rv = data.shape[1] data, value_dict = replace_strings(data, return_values=True) if feature_selection is None: _T = range(n_rv) else: assert (not isinstance(feature_selection, list)), 'feature_selection must be only one value' _T = [feature_selection] # STEP 1 : COMPUTE MARKOV BLANKETS Mb = dict([(rv,[]) for rv in range(n_rv)]) for X in _T: S = [] grow_condition = True while grow_condition: grow_condition=False for Y in range(n_rv): if X!=Y and Y not in S: # if there exists some Y such that Y is dependent on X given S, # add Y to S cols = (X,Y) + tuple(S) pval = mi_test(data[:,cols]) if pval < alpha: # dependent grow_condition=True # dependent -> continue searching S.append(Y) shrink_condition = True while shrink_condition: TEMP_S = [] shrink_condition=False for Y in S: s_copy = copy(S) s_copy.remove(Y) # condition on S-{Y} # if X independent of Y given S-{Y}, leave Y out # if X dependent of Y given S-{Y}, keep it in cols = (X,Y) + tuple(s_copy) pval = mi_test(data[:,cols]) if pval < alpha: # dependent TEMP_S.append(Y) else: # independent -> condition searching shrink_condition=True Mb[X] = TEMP_S if debug: print 'Markov Blanket for %s : %s' % (X, str(TEMP_S)) if feature_selection is None: # STEP 2: COMPUTE GRAPH STRUCTURE # i.e. Resolve Markov Blanket edge_dict = resolve_markov_blanket(Mb,data) if debug: print 'Unoriented edge dict:\n %s' % str(edge_dict) # STEP 3: ORIENT EDGES oriented_edge_dict = orient_edges_MB(edge_dict,Mb,data,alpha) if debug: print 'Oriented edge dict:\n %s' % str(oriented_edge_dict) # CREATE BAYESNET OBJECT bn=BayesNet(oriented_edge_dict,value_dict) return bn else: return Mb[_T]
def gs(data, alpha=0.05, feature_selection=None, debug=False): """ Perform growshink algorithm over dataset to learn Bayesian network structure. This algorithm is clearly a good candidate for numba JIT compilation... STEPS ----- 1. Compute Markov Blanket 2. Compute Graph Structure 3. Orient Edges 4. Remove Cycles 5. Reverse Edges 6. Propagate Directions Arguments --------- *data* : a nested numpy array Data from which you wish to learn structure *alpha* : a float Type I error rate for independence test Returns ------- *bn* : a BayesNet object Effects ------- None Notes ----- Speed Test: *** 5 variables, 624 observations *** - 63.7 ms """ n_rv = data.shape[1] data, value_dict = replace_strings(data, return_values=True) if feature_selection is None: _T = range(n_rv) else: assert (not isinstance(feature_selection, list)), 'feature_selection must be only one value' _T = [feature_selection] # STEP 1 : COMPUTE MARKOV BLANKETS Mb = dict([(rv,[]) for rv in range(n_rv)]) for X in _T: S = [] grow_condition = True while grow_condition: grow_condition=False for Y in range(n_rv): if X!=Y and Y not in S: # if there exists some Y such that Y is dependent on X given S, # add Y to S cols = (X,Y) + tuple(S) pval = mi_test(data[:,cols]) if pval < alpha: # dependent grow_condition=True # dependent -> continue searching S.append(Y) shrink_condition = True while shrink_condition: TEMP_S = [] shrink_condition=False for Y in S: s_copy = copy(S) s_copy.remove(Y) # condition on S-{Y} # if X independent of Y given S-{Y}, leave Y out # if X dependent of Y given S-{Y}, keep it in cols = (X,Y) + tuple(s_copy) pval = mi_test(data[:,cols]) if pval < alpha: # dependent TEMP_S.append(Y) else: # independent -> condition searching shrink_condition=True Mb[X] = TEMP_S if debug: print('Markov Blanket for %s : %s' % (X, str(TEMP_S))) if feature_selection is None: # STEP 2: COMPUTE GRAPH STRUCTURE # i.e. Resolve Markov Blanket edge_dict = resolve_markov_blanket(Mb,data) if debug: print('Unoriented edge dict:\n %s' % str(edge_dict)) # STEP 3: ORIENT EDGES oriented_edge_dict = orient_edges_MB(edge_dict,Mb,data,alpha) if debug: print('Oriented edge dict:\n %s' % str(oriented_edge_dict)) # CREATE BAYESNET OBJECT bn=BayesNet(oriented_edge_dict,value_dict) return bn else: return Mb[_T]
def iamb(data, alpha=0.05, feature_selection=None, debug=False): """ IAMB Algorithm for learning the structure of a Discrete Bayesian Network from data. Arguments --------- *data* : a nested numpy array *alpha* : a float The type II error rate. *feature_selection* : None or a string Whether to use IAMB as a structure learning or feature selection algorithm. Returns ------- *bn* : a BayesNet object or *mb* : the markov blanket of a node Effects ------- None Notes ----- - Works but there are definitely some bugs. Speed Test: *** 5 vars, 624 obs *** - 196 ms """ n_rv = data.shape[1] Mb = dict([(rv,[]) for rv in range(n_rv)]) if feature_selection is None: _T = range(n_rv) else: assert (not isinstance(feature_selection, list)), 'feature_selection must be only one value' _T = [feature_selection] # LEARN MARKOV BLANKET for T in _T: V = set(range(n_rv)) - {T} Mb_change=True # GROWING PHASE while Mb_change: Mb_change = False # find X_max in V-Mb(T)-{T} that maximizes # mutual information of X,T|Mb(T) # i.e. max of mi_test(data[:,(X,T,Mb(T))]) max_val = -1 max_x = None for X in V - set(Mb[T]) - {T}: cols = (X,T)+tuple(Mb[T]) mi_val = mi_test(data[:,cols],test=False) if mi_val > max_val: max_val = mi_val max_x = X # if Xmax is dependent on T given Mb(T) cols = (max_x,T) + tuple(Mb[T]) if max_x is not None and are_independent(data[:,cols]): Mb[T].append(X) Mb_change = True if debug: print 'Adding %s to MB of %s' % (str(X), str(T)) # SHRINKING PHASE for X in Mb[T]: # if x is independent of t given Mb(T) - {x} cols = (X,T) + tuple(set(Mb[T]) - {X}) if are_independent(data[:,cols],alpha): Mb[T].remove(X) if debug: print 'Removing %s from MB of %s' % (str(X), str(T)) if feature_selection is None: # RESOLVE GRAPH STRUCTURE edge_dict = resolve_markov_blanket(Mb, data) if debug: print 'Unoriented edge dict:\n %s' % str(edge_dict) print 'MB: %s' % str(Mb) # ORIENT EDGES oriented_edge_dict = orient_edges_gs2(edge_dict,Mb,data,alpha) if debug: print 'Oriented edge dict:\n %s' % str(oriented_edge_dict) # CREATE BAYESNET OBJECT value_dict = dict(zip(range(data.shape[1]), [list(np.unique(col)) for col in data.T])) bn=BayesNet(oriented_edge_dict,value_dict) return bn else: return Mb[_T]