コード例 #1
0
def test_io_from_structure():
    structure = ((2,), (0, 2), ())

    model1 = BayesianNetwork.from_structure(X=X, weights=weights,
        structure=structure)
    model2 = BayesianNetwork.from_structure(X=data_generator,
        structure=structure)

    logp1 = model1.log_probability(X)
    logp2 = model2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
コード例 #2
0
ファイル: bayesnets.py プロジェクト: helloground/doppelganger
    def train(input_data, structure, fields, prior_data=None):
        """Creates bayesian networks from the given data with the given structure.

        The given data cannot contain any missing data. If called multiple
        times, the old model will be replaced.  To update the model with new
        data, see `update`.

        Args:
            input_data (SegmentedData): typed data to train on
            structure (iterable(iterable)): structure as returned from
                    define_bayes_net_structure
            fields (list(unicode)): field names to learn
            prior_data (list(data)): optional list of training samples to use
                    as a prior for each network.

        Return:
            BayesianNetworkModel: A predictive model training on the given data

        """
        type_to_network = {}
        for type_, data in input_data.type_to_data.items():
            if prior_data is not None:
                # Make defensive copy
                data = list(data) + list(prior_data)
            bayesian_network = BayesianNetwork.from_structure(data, structure)
            type_to_network[type_] = bayesian_network
        return BayesianNetworkModel(type_to_network,
                                    fields,
                                    segmenter=input_data.segmenter)
コード例 #3
0
def test_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
コード例 #4
0
    def _likelihoods(cls,
                     real_data,
                     synthetic_data,
                     metadata=None,
                     structure=None):
        metadata = cls._validate_inputs(real_data, synthetic_data, metadata)
        structure = metadata.get('structure', structure)
        fields = cls._select_fields(metadata, ('categorical', 'boolean'))

        if not fields:
            return np.full(len(real_data), np.nan)

        LOGGER.debug('Fitting the BayesianNetwork to the real data')
        if structure:
            if isinstance(structure, dict):
                structure = BayesianNetwork.from_json(
                    json.dumps(structure)).structure

            bn = BayesianNetwork.from_structure(real_data[fields].to_numpy(),
                                                structure)
        else:
            bn = BayesianNetwork.from_samples(real_data[fields].to_numpy(),
                                              algorithm='chow-liu')

        LOGGER.debug('Evaluating likelihood of the synthetic data')
        probabilities = []
        for _, row in synthetic_data[fields].iterrows():
            try:
                probabilities.append(bn.probability([row.to_numpy()]))
            except ValueError:
                probabilities.append(0)

        return np.asarray(probabilities)
コード例 #5
0
def _evaluate_bayesian_likelihood(train, test, metadata):
    LOGGER.info('Evaluating using Bayesian Likelihood.')

    train_mapped = _mapper(train, metadata)
    test_mapped = _mapper(test, metadata)

    structure_json = json.dumps(metadata['structure'])
    bn1 = BayesianNetwork.from_json(structure_json)
    bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure)

    l1 = np.mean(np.log(bn1.probability(train_mapped) + 1e-8))

    l2_probs = []
    failed = 0
    for item in test_mapped:
        try:
            l2_probs.append(bn2.probability([item]))
        except ValueError:
            failed += 1
            l2_probs.append(0)

    l2 = np.mean(np.log(np.asarray(l2_probs) + 1e-8))

    return pd.DataFrame([{
        "name": "Bayesian Likelihood",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }])
コード例 #6
0
ファイル: evaluate.py プロジェクト: zheng-ningxin/SDGym
def _evaluate_bayesian_likelihood(train, test, metadata):
    LOGGER.info('Evaluating using Bayesian Likelihood.')
    structure_json = json.dumps(metadata['structure'])
    bn1 = BayesianNetwork.from_json(structure_json)

    train_mapped = _mapper(train, metadata)
    test_mapped = _mapper(test, metadata)
    prob = []
    for item in train_mapped:
        try:
            prob.append(bn1.probability(item))
        except Exception:
            prob.append(1e-8)

    l1 = np.mean(np.log(np.asarray(prob) + 1e-8))

    bn2 = BayesianNetwork.from_structure(train_mapped, bn1.structure)
    prob = []

    for item in test_mapped:
        try:
            prob.append(bn2.probability(item))
        except Exception:
            prob.append(1e-8)

    l2 = np.mean(np.log(np.asarray(prob) + 1e-8))

    return pd.DataFrame([{
        "name": "Bayesian Likelihood",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }])
コード例 #7
0
def default_bayesian_likelihood(dataset, trainset, testset, meta):
    struct = glob.glob("data/*/{}_structure.json".format(dataset))
    assert len(struct) == 1
    bn1 = BayesianNetwork.from_json(struct[0])

    trainset_mapped = mapper(trainset, meta)
    testset_mapped = mapper(testset, meta)
    prob = []
    for item in trainset_mapped:
        try:
            prob.append(bn1.probability(item))
        except:
            prob.append(1e-8)
    l1 = np.mean(np.log(np.asarray(prob) + 1e-8))

    bn2 = BayesianNetwork.from_structure(trainset_mapped, bn1.structure)
    prob = []
    for item in testset_mapped:
        try:
            prob.append(bn2.probability(item))
        except:
            prob.append(1e-8)
    l2 = np.mean(np.log(np.asarray(prob) + 1e-8))

    return [{
        "name": "default",
        "syn_likelihood": l1,
        "test_likelihood": l2,
    }]
コード例 #8
0
def train_model(data: np.ndarray,
                clusters: int = 5,
                init_nodes: list = None) -> BayesianNetwork:

    bn = BayesNet()
    #Сluster the initial data in order to fill in a hidden variable based on the distribution of clusters
    kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data)
    labels = kmeans.labels_
    hidden_dist = DiscreteDistribution.from_samples(labels)
    hidden_var = np.array(hidden_dist.sample(data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    latent = (new_data.shape[1]) - 1

    #Train the network structure on data taking into account a hidden variable
    bn = hc_rr(new_data, latent=latent, init_nodes=init_nodes)
    structure = []
    nodes = sorted(list(bn.nodes()))
    for rv in nodes:
        structure.append(tuple(bn.F[rv]['parents']))
    structure = tuple(structure)
    bn = BayesianNetwork.from_structure(new_data, structure)
    bn.bake()
    #Learn a hidden variable
    hidden_var = np.array([np.nan] * (data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    bn.predict(new_data)
    bn.fit(new_data)
    bn.bake()
    return (bn)
コード例 #9
0
ファイル: test_bayes_net.py プロジェクト: pixelou/pomegranate
def test_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model2 = BayesianNetwork.from_json(model.to_json())
    assert_equal(model2.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
コード例 #10
0
def test_robust_from_structure():
    X = datasets[1]
    structure = ((1, 2), (4,), (), (), (3,))
    model = BayesianNetwork.from_structure(X, structure=structure)

    assert_equal(model.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model2 = from_json(model.to_json())
    assert_equal(model2.structure, structure)
    assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)

    model_dtype = type(model.states[0].distribution.parameters[0][0][0])
    model2_dtype = type(model2.states[0].distribution.parameters[0][0][0])
    assert_equal(model_dtype, model2_dtype)
コード例 #11
0
    def _train_kronos_for_ecosystem(cls, kronos_dependency_dict, package_occurrence_df):

        kronos_dependency_list_string = kronos_dependency_dict[
            pgm_constants.KD_PARENT_TUPLE_LIST]
        kronos_node_list = kronos_dependency_dict[pgm_constants.KD_PACKAGE_LIST] + \
            kronos_dependency_dict[pgm_constants.KD_INTENT_LIST]
        # Funny construct for python3 compatability and python2 backward compatibility
        # of pomegranate.
        kronos_node_string_list = [node_name.decode('utf-8')
                                   if type(node_name) == bytes else str(node_name)
                                   for node_name in kronos_node_list]
        kronos_dependency_list = utils.generate_kronos_dependency_list_for_pomegranate(
            kronos_dependency_list_string)

        package_occurrence_matrix = utils.generate_matrix_from_pandas_df(package_occurrence_df,
                                                                         kronos_node_list)

        pgm_model = BayesianNetwork.from_structure(package_occurrence_matrix,
                                                   structure=kronos_dependency_list,
                                                   state_names=kronos_node_string_list)
        return pgm_model
コード例 #12
0
def BIC_score(data,Pa,card,struct):    
    #
    z = []
    for ii in range(len(Pa)):
        zz = Pa[ii].copy()
        zz.append(ii)
        z.append(zz)
    
    for ii in range(len(z)-1):
        for jj in range(ii+1,len(z)):
            if len(set(z[ii]) - (set(z[ii]) - set(z[jj])))>0:
                z[ii] = list(set(z[ii]+z[jj]))
                z[jj] = list(set(z[ii]+z[jj]))
    ss=100000000
    for ii in range(len(z)):
        if len(z[ii])==len(z):
           ss=0
    #    
    model = BayesianNetwork()
    model = BayesianNetwork.from_structure(data,struct)
    BIC = model.log_probability(data).sum() - np.log(data.shape[0])*indep_params(Pa,card)/2 - ss
    return BIC
コード例 #13
0
X = X[:, :, 1]
print(X.shape)
X = X[:, :num_f2]
X = X > numpy.mean(X)
plt.imshow(X)
plt.show()
f1 = ()
f2 = ()
for i in range(0, num_f1):
    f1 += (i, )
for i in range(num_f1, num_f2):
    f2 += (i, )

structure = ()
for i in range(0, num_f1):
    structure += ((), )
for i in range(num_f1, num_f2):
    structure += (f1, )
#structure += (f2, )
#for i in range(30, 31):
#    structure += (f2, )
print(structure)

model = BayesianNetwork.from_structure(X, structure)
model.plot()
plt.show()
p = model.probability(X)
print(p.shape)
print(p)
#plt.imshow(X)
#plt.show()