Esempio n. 1
0
def train_model(data: np.ndarray,
                clusters: int = 5,
                init_nodes: list = None) -> BayesianNetwork:

    bn = BayesNet()
    #Сluster the initial data in order to fill in a hidden variable based on the distribution of clusters
    kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data)
    labels = kmeans.labels_
    hidden_dist = DiscreteDistribution.from_samples(labels)
    hidden_var = np.array(hidden_dist.sample(data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    latent = (new_data.shape[1]) - 1

    #Train the network structure on data taking into account a hidden variable
    bn = hc_rr(new_data, latent=latent, init_nodes=init_nodes)
    structure = []
    nodes = sorted(list(bn.nodes()))
    for rv in nodes:
        structure.append(tuple(bn.F[rv]['parents']))
    structure = tuple(structure)
    bn = BayesianNetwork.from_structure(new_data, structure)
    bn.bake()
    #Learn a hidden variable
    hidden_var = np.array([np.nan] * (data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    bn.predict(new_data)
    bn.fit(new_data)
    bn.bake()
    return (bn)
Esempio n. 2
0
def mmhc(data, alpha=0.05, metric='AIC', max_iter=100, method='hc'):
    """
	Max-Min Hill Climbing Algorithm for
	learning a Bayesian Network structure
	from data.

	Arguments
	---------
	*data* : a numpy ndarray

	*alpha* : a float
		Probability of Type II Error for
		independence tests.

	*metric* : a string
		*metric* : a string
		Which score metric to use.
		Options:
			- 'AIC'
			- 'BIC'
			- 'LL' (log-likelihood)

	*method* : a string
		The type of hill-climbing algorithm to run
		OPTIONS:
			- 'hc' : normal hill-climbing
			- 'rr' : hill-climbing with random restarts
			- 'tabu' : tabu hill-climbing

	Returns
	-------
	*bn* : a BayesNet object

	"""
    # GET EDGE RESTRICTIONS FROM MMPC
    PC_dict = mmpc(data)
    restriction = []
    for y, pc in PC_dict.items():
        for x in pc:
            restriction.append((y, x))

    # RUN HILL-CLIMBING WITH EDGE RESTRICTIONS
    if method == 'tabu':
        bn = tabu(data=data,
                  metric=metric,
                  max_iter=max_iter,
                  restriction=restriction)
    elif method == 'rr':
        bn = hc_rr(data=data,
                   metric=metric,
                   max_iter=max_iter,
                   restriction=restriction)
    else:
        bn = hc(data=data,
                metric=metric,
                max_iter=max_iter,
                restriction=restriction)

    return bn
Esempio n. 3
0
def mdbn(data, f_cols, c_cols, f_struct='DAG', c_struct='DAG', wrapper=False):
	"""
	Learn the structure of a Multi-Dimensional Bayesian Network - 
	typically used for Classification.

	Note that this structure does not have to be used for classification,
	since it simply returns a Bayesian Network - albeit with a more
	unqiue structure than tradiitonally found. If there are any other
	applications of this bipartite-like BN structure learning, this
	algorithm can certainly be used.

	"""
	f_data = data[:,f_cols]
	c_data = data[:,c_cols]

	f_bn = hc_rr(f_data)
	c_bn = hc_rr(c_data)

	mbc_bn = bridge(c_bn=c_bn, f_bn=f_bn, data=data)

	return mbc_bn
Esempio n. 4
0
def bridge(c_bn, f_bn, data):
	"""
	Make a Multi-Dimensional Bayesian Network by
	bridging two Bayesian network structures. This happens by
	placing edges from c_bn -> f_bn using a heuristic 
	optimization procedure.

	This can be used to create a Multi-Dimensional Bayesian
	Network classifier from two already-learned Bayesian networks -
	one of which is a BN containing all the class variables, the other
	containing all the feature variables.

	Arguments
	---------
	*c_bn* : a BayesNet object with known structure

	*f_bn* : a BayesNet object with known structure.

	Returns
	-------
	*m_bn* : a merged/bridge BayesNet object,
		whose structure contains *c_bn*, *f_bn*, and some bridge
		edges between them.
	"""
	restrict = []
	for u in c_bn:
		for v in f_bn:
			restrict.append((u,v)) # only allow edges from c_bn -> f_bn

	bridge_bn = hc_rr(data, restriction=restrict)

	m_bn = bridge_bn.E
	m_bn.update(c_bn.E)
	m_bn.update(f_bn.E)

	mbc_bn = BayesNet(E=m_bn)