Example #1
0
def test_io_from_structure():
    structure = ((2,), (0, 2), ())

    model1 = BayesianNetwork.from_structure(X=X, weights=weights,
        structure=structure)
    model2 = BayesianNetwork.from_structure(X=data_generator,
        structure=structure)

    logp1 = model1.log_probability(X)
    logp2 = model2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
Example #2
0
    def train(input_data, structure, fields, prior_data=None):
        """Creates bayesian networks from the given data with the given structure.

        The given data cannot contain any missing data. If called multiple
        times, the old model will be replaced.  To update the model with new
        data, see `update`.

        Args:
            input_data (SegmentedData): typed data to train on
            structure (iterable(iterable)): structure as returned from
                    define_bayes_net_structure
            fields (list(unicode)): field names to learn
            prior_data (list(data)): optional list of training samples to use
                    as a prior for each network.

        Return:
            BayesianNetworkModel: A predictive model training on the given data

        """
        type_to_network = {}
        for type_, data in input_data.type_to_data.items():
            if prior_data is not None:
                # Make defensive copy
                data = list(data) + list(prior_data)
            bayesian_network = BayesianNetwork.from_structure(data, structure)
            type_to_network[type_] = bayesian_network
        return BayesianNetworkModel(type_to_network,
                                    fields,
                                    segmenter=input_data.segmenter)
Example #3
0
def test_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
Example #4
0
    def _likelihoods(cls,
                     real_data,
                     synthetic_data,
                     metadata=None,
                     structure=None):
        metadata = cls._validate_inputs(real_data, synthetic_data, metadata)
        structure = metadata.get('structure', structure)
        fields = cls._select_fields(metadata, ('categorical', 'boolean'))

        if not fields:
            return np.full(len(real_data), np.nan)

        LOGGER.debug('Fitting the BayesianNetwork to the real data')
        if structure:
            if isinstance(structure, dict):
                structure = BayesianNetwork.from_json(
                    json.dumps(structure)).structure

            bn = BayesianNetwork.from_structure(real_data[fields].to_numpy(),
                                                structure)
        else:
            bn = BayesianNetwork.from_samples(real_data[fields].to_numpy(),
                                              algorithm='chow-liu')

        LOGGER.debug('Evaluating likelihood of the synthetic data')
        probabilities = []
        for _, row in synthetic_data[fields].iterrows():
            try:
                probabilities.append(bn.probability([row.to_numpy()]))
            except ValueError:
                probabilities.append(0)

        return np.asarray(probabilities)
Example #5
0
def _evaluate_bayesian_likelihood(train, test, metadata):
    LOGGER.info('Evaluating using Bayesian Likelihood.')

    train_mapped = _mapper(train, metadata)
    test_mapped = _mapper(test, metadata)

    structure_json = json.dumps(metadata['structure'])
    bn1 = BayesianNetwork.from_json(structure_json)
    bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure)

    l1 = np.mean(np.log(bn1.probability(train_mapped) + 1e-8))

    l2_probs = []
    failed = 0
    for item in test_mapped:
        try:
            l2_probs.append(bn2.probability([item]))
        except ValueError:
            failed += 1
            l2_probs.append(0)

    l2 = np.mean(np.log(np.asarray(l2_probs) + 1e-8))

    return pd.DataFrame([{
        "name": "Bayesian Likelihood",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }])
Example #6
0
def _evaluate_bayesian_likelihood(train, test, metadata):
    LOGGER.info('Evaluating using Bayesian Likelihood.')
    structure_json = json.dumps(metadata['structure'])
    bn1 = BayesianNetwork.from_json(structure_json)

    train_mapped = _mapper(train, metadata)
    test_mapped = _mapper(test, metadata)
    prob = []
    for item in train_mapped:
        try:
            prob.append(bn1.probability(item))
        except Exception:
            prob.append(1e-8)

    l1 = np.mean(np.log(np.asarray(prob) + 1e-8))

    bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure)
    prob = []

    for item in test_mapped:
        try:
            prob.append(bn2.probability(item))
        except Exception:
            prob.append(1e-8)

    l2 = np.mean(np.log(np.asarray(prob) + 1e-8))

    return pd.DataFrame([{
        "name": "Bayesian Likelihood",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }])
Example #7
0
def default_bayesian_likelihood(dataset, trainset, testset, meta):
    struct = glob.glob("data/*/{}_structure.json".format(dataset))
    assert len(struct) == 1
    bn1 = BayesianNetwork.from_json(struct[0])

    trainset_mapped = mapper(trainset, meta)
    testset_mapped = mapper(testset, meta)
    prob = []
    for item in trainset_mapped:
        try:
            prob.append(bn1.probability(item))
        except:
            prob.append(1e-8)
    l1 = np.mean(np.log(np.asarray(prob) + 1e-8))

    bn2 = BayesianNetwork.from_structure(trainset_mapped, bn1.structure)
    prob = []
    for item in testset_mapped:
        try:
            prob.append(bn2.probability(item))
        except:
            prob.append(1e-8)
    l2 = np.mean(np.log(np.asarray(prob) + 1e-8))

    return [{
        "name": "default",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }]
Example #8
0
def train_model(data: np.ndarray,
                clusters: int = 5,
                init_nodes: list = None) -> BayesianNetwork:

    bn = BayesNet()
    #Сluster the initial data in order to fill in a hidden variable based on the distribution of clusters
    kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data)
    labels = kmeans.labels_
    hidden_dist = DiscreteDistribution.from_samples(labels)
    hidden_var = np.array(hidden_dist.sample(data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    latent = (new_data.shape[1]) - 1

    #Train the network structure on data taking into account a hidden variable
    bn = hc_rr(new_data, latent=latent, init_nodes=init_nodes)
    structure = []
    nodes = sorted(list(bn.nodes()))
    for rv in nodes:
        structure.append(tuple(bn.F[rv]['parents']))
    structure = tuple(structure)
    bn = BayesianNetwork.from_structure(new_data, structure)
    bn.bake()
    #Learn a hidden variable
    hidden_var = np.array([np.nan] * (data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    bn.predict(new_data)
    bn.fit(new_data)
    bn.bake()
    return (bn)
Example #9
0
def test_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model2 = BayesianNetwork.from_json(model.to_json())
    assert_equal(model2.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
def test_robust_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model2 = from_json(model.to_json())
    assert_equal(model2.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model_dtype = type(model.states[0].distribution.parameters[0][0][0])
    model2_dtype = type(model2.states[0].distribution.parameters[0][0][0])
    assert_equal(model_dtype, model2_dtype)
Example #11
0
    def _train_kronos_for_ecosystem(cls, kronos_dependency_dict, package_occurrence_df):

        kronos_dependency_list_string = kronos_dependency_dict[
            pgm_constants.KD_PARENT_TUPLE_LIST]
        kronos_node_list = kronos_dependency_dict[pgm_constants.KD_PACKAGE_LIST] + \
            kronos_dependency_dict[pgm_constants.KD_INTENT_LIST]
        # Funny construct for python3 compatability and python2 backward compatibility
        # of pomegranate.
        kronos_node_string_list = [node_name.decode('utf-8')
                                   if type(node_name) == bytes else str(node_name)
                                   for node_name in kronos_node_list]
        kronos_dependency_list = utils.generate_kronos_dependency_list_for_pomegranate(
            kronos_dependency_list_string)

        package_occurrence_matrix = utils.generate_matrix_from_pandas_df(package_occurrence_df,
                                                                         kronos_node_list)

        pgm_model = BayesianNetwork.from_structure(package_occurrence_matrix,
                                                   structure=kronos_dependency_list,
                                                   state_names=kronos_node_string_list)
        return pgm_model
Example #12
0
def BIC_score(data,Pa,card,struct):    
    #
    z = []
    for ii in range(len(Pa)):
        zz = Pa[ii].copy()
        zz.append(ii)
        z.append(zz)
    
    for ii in range(len(z)-1):
        for jj in range(ii+1,len(z)):
            if len(set(z[ii]) - (set(z[ii]) - set(z[jj])))>0:
                z[ii] = list(set(z[ii]+z[jj]))
                z[jj] = list(set(z[ii]+z[jj]))
    ss=100000000
    for ii in range(len(z)):
        if len(z[ii])==len(z):
           ss=0
    #    
    model = BayesianNetwork()
    model = BayesianNetwork.from_structure(data,struct)
    BIC = model.log_probability(data).sum() - np.log(data.shape[0])*indep_params(Pa,card)/2 - ss
    return BIC
Example #13
0
X = X[:, :, 1]
print(X.shape)
X = X[:, :num_f2]
X = X > numpy.mean(X)
plt.imshow(X)
plt.show()
f1 = ()
f2 = ()
for i in range(0, num_f1):
    f1 += (i, )
for i in range(num_f1, num_f2):
    f2 += (i, )

structure = ()
for i in range(0, num_f1):
    structure += ((), )
for i in range(num_f1, num_f2):
    structure += (f1, )
#structure += (f2, )
#for i in range(30, 31):
#    structure += (f2, )
print(structure)

model = BayesianNetwork.from_structure(X, structure)
model.plot()
plt.show()
p = model.probability(X)
print(p.shape)
print(p)
#plt.imshow(X)
#plt.show()