def test_io_from_structure(): structure = ((2,), (0, 2), ()) model1 = BayesianNetwork.from_structure(X=X, weights=weights, structure=structure) model2 = BayesianNetwork.from_structure(X=data_generator, structure=structure) logp1 = model1.log_probability(X) logp2 = model2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def train(input_data, structure, fields, prior_data=None): """Creates bayesian networks from the given data with the given structure. The given data cannot contain any missing data. If called multiple times, the old model will be replaced. To update the model with new data, see `update`. Args: input_data (SegmentedData): typed data to train on structure (iterable(iterable)): structure as returned from define_bayes_net_structure fields (list(unicode)): field names to learn prior_data (list(data)): optional list of training samples to use as a prior for each network. Return: BayesianNetworkModel: A predictive model training on the given data """ type_to_network = {} for type_, data in input_data.type_to_data.items(): if prior_data is not None: # Make defensive copy data = list(data) + list(prior_data) bayesian_network = BayesianNetwork.from_structure(data, structure) type_to_network[type_] = bayesian_network return BayesianNetworkModel(type_to_network, fields, segmenter=input_data.segmenter)
def test_from_structure(): X = datasets[1] structure = ((1, 2), (4,), (), (), (3,)) model = BayesianNetwork.from_structure(X, structure=structure) assert_equal(model.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
def _likelihoods(cls, real_data, synthetic_data, metadata=None, structure=None): metadata = cls._validate_inputs(real_data, synthetic_data, metadata) structure = metadata.get('structure', structure) fields = cls._select_fields(metadata, ('categorical', 'boolean')) if not fields: return np.full(len(real_data), np.nan) LOGGER.debug('Fitting the BayesianNetwork to the real data') if structure: if isinstance(structure, dict): structure = BayesianNetwork.from_json( json.dumps(structure)).structure bn = BayesianNetwork.from_structure(real_data[fields].to_numpy(), structure) else: bn = BayesianNetwork.from_samples(real_data[fields].to_numpy(), algorithm='chow-liu') LOGGER.debug('Evaluating likelihood of the synthetic data') probabilities = [] for _, row in synthetic_data[fields].iterrows(): try: probabilities.append(bn.probability([row.to_numpy()])) except ValueError: probabilities.append(0) return np.asarray(probabilities)
def _evaluate_bayesian_likelihood(train, test, metadata): LOGGER.info('Evaluating using Bayesian Likelihood.') train_mapped = _mapper(train, metadata) test_mapped = _mapper(test, metadata) structure_json = json.dumps(metadata['structure']) bn1 = BayesianNetwork.from_json(structure_json) bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure) l1 = np.mean(np.log(bn1.probability(train_mapped) + 1e-8)) l2_probs = [] failed = 0 for item in test_mapped: try: l2_probs.append(bn2.probability([item])) except ValueError: failed += 1 l2_probs.append(0) l2 = np.mean(np.log(np.asarray(l2_probs) + 1e-8)) return pd.DataFrame([{ "name": "Bayesian Likelihood", "syn_likelihood": l1, "test_likelihood": l2, }])
def _evaluate_bayesian_likelihood(train, test, metadata): LOGGER.info('Evaluating using Bayesian Likelihood.') structure_json = json.dumps(metadata['structure']) bn1 = BayesianNetwork.from_json(structure_json) train_mapped = _mapper(train, metadata) test_mapped = _mapper(test, metadata) prob = [] for item in train_mapped: try: prob.append(bn1.probability(item)) except Exception: prob.append(1e-8) l1 = np.mean(np.log(np.asarray(prob) + 1e-8)) bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure) prob = [] for item in test_mapped: try: prob.append(bn2.probability(item)) except Exception: prob.append(1e-8) l2 = np.mean(np.log(np.asarray(prob) + 1e-8)) return pd.DataFrame([{ "name": "Bayesian Likelihood", "syn_likelihood": l1, "test_likelihood": l2, }])
def default_bayesian_likelihood(dataset, trainset, testset, meta): struct = glob.glob("data/*/{}_structure.json".format(dataset)) assert len(struct) == 1 bn1 = BayesianNetwork.from_json(struct[0]) trainset_mapped = mapper(trainset, meta) testset_mapped = mapper(testset, meta) prob = [] for item in trainset_mapped: try: prob.append(bn1.probability(item)) except: prob.append(1e-8) l1 = np.mean(np.log(np.asarray(prob) + 1e-8)) bn2 = BayesianNetwork.from_structure(trainset_mapped, bn1.structure) prob = [] for item in testset_mapped: try: prob.append(bn2.probability(item)) except: prob.append(1e-8) l2 = np.mean(np.log(np.asarray(prob) + 1e-8)) return [{ "name": "default", "syn_likelihood": l1, "test_likelihood": l2, }]
def train_model(data: np.ndarray, clusters: int = 5, init_nodes: list = None) -> BayesianNetwork: bn = BayesNet() #Сluster the initial data in order to fill in a hidden variable based on the distribution of clusters kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data) labels = kmeans.labels_ hidden_dist = DiscreteDistribution.from_samples(labels) hidden_var = np.array(hidden_dist.sample(data.shape[0])) new_data = np.column_stack((data, hidden_var)) latent = (new_data.shape[1]) - 1 #Train the network structure on data taking into account a hidden variable bn = hc_rr(new_data, latent=latent, init_nodes=init_nodes) structure = [] nodes = sorted(list(bn.nodes())) for rv in nodes: structure.append(tuple(bn.F[rv]['parents'])) structure = tuple(structure) bn = BayesianNetwork.from_structure(new_data, structure) bn.bake() #Learn a hidden variable hidden_var = np.array([np.nan] * (data.shape[0])) new_data = np.column_stack((data, hidden_var)) bn.predict(new_data) bn.fit(new_data) bn.bake() return (bn)
def test_from_structure(): X = datasets[1] structure = ((1, 2), (4,), (), (), (3,)) model = BayesianNetwork.from_structure(X, structure=structure) assert_equal(model.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4) model2 = BayesianNetwork.from_json(model.to_json()) assert_equal(model2.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
def test_robust_from_structure(): X = datasets[1] structure = ((1, 2), (4,), (), (), (3,)) model = BayesianNetwork.from_structure(X, structure=structure) assert_equal(model.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4) model2 = from_json(model.to_json()) assert_equal(model2.structure, structure) assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4) model_dtype = type(model.states[0].distribution.parameters[0][0][0]) model2_dtype = type(model2.states[0].distribution.parameters[0][0][0]) assert_equal(model_dtype, model2_dtype)
def _train_kronos_for_ecosystem(cls, kronos_dependency_dict, package_occurrence_df): kronos_dependency_list_string = kronos_dependency_dict[ pgm_constants.KD_PARENT_TUPLE_LIST] kronos_node_list = kronos_dependency_dict[pgm_constants.KD_PACKAGE_LIST] + \ kronos_dependency_dict[pgm_constants.KD_INTENT_LIST] # Funny construct for python3 compatability and python2 backward compatibility # of pomegranate. kronos_node_string_list = [node_name.decode('utf-8') if type(node_name) == bytes else str(node_name) for node_name in kronos_node_list] kronos_dependency_list = utils.generate_kronos_dependency_list_for_pomegranate( kronos_dependency_list_string) package_occurrence_matrix = utils.generate_matrix_from_pandas_df(package_occurrence_df, kronos_node_list) pgm_model = BayesianNetwork.from_structure(package_occurrence_matrix, structure=kronos_dependency_list, state_names=kronos_node_string_list) return pgm_model
def BIC_score(data,Pa,card,struct): # z = [] for ii in range(len(Pa)): zz = Pa[ii].copy() zz.append(ii) z.append(zz) for ii in range(len(z)-1): for jj in range(ii+1,len(z)): if len(set(z[ii]) - (set(z[ii]) - set(z[jj])))>0: z[ii] = list(set(z[ii]+z[jj])) z[jj] = list(set(z[ii]+z[jj])) ss=100000000 for ii in range(len(z)): if len(z[ii])==len(z): ss=0 # model = BayesianNetwork() model = BayesianNetwork.from_structure(data,struct) BIC = model.log_probability(data).sum() - np.log(data.shape[0])*indep_params(Pa,card)/2 - ss return BIC
X = X[:, :, 1] print(X.shape) X = X[:, :num_f2] X = X > numpy.mean(X) plt.imshow(X) plt.show() f1 = () f2 = () for i in range(0, num_f1): f1 += (i, ) for i in range(num_f1, num_f2): f2 += (i, ) structure = () for i in range(0, num_f1): structure += ((), ) for i in range(num_f1, num_f2): structure += (f1, ) #structure += (f2, ) #for i in range(30, 31): # structure += (f2, ) print(structure) model = BayesianNetwork.from_structure(X, structure) model.plot() plt.show() p = model.probability(X) print(p.shape) print(p) #plt.imshow(X) #plt.show()