Python K2Scoreの例、pgmpy.estimators.K2Score Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_K2Score.py プロジェクト: studosi-fer/STRUCE

 def test_score_titanic(self):
     scorer = K2Score(self.titanic_data2)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1891.0630673606006)
     titanic2 = BayesianModel([("Pclass", "Sex"), ])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))

コード例 #2

0

ファイルを表示

ファイル: structure_learning.py プロジェクト: pgmpy/pgmpy-benchmarks

 def setup(self):
     model = get_example_model('alarm')
     samples = model.simulate(n_samples=int(1e4),
                              seed=42,
                              show_progress=False)
     self.scoring_method = K2Score(samples)
     self.est = HillClimbSearch(data=samples)

コード例 #3

0

ファイルを表示

ファイル: HillClimbSearch.py プロジェクト: sajib-4414/comparative-study-with-pgmpy

def __init__(self, data, scoring_method=None, **kwargs):
"""
Class for heuristic hill climb searches for DAGs, to learn
network structure from data. `estimate` attempts to find a model with optimal score.

Parameters
----------
data: pandas DataFrame object
datafame object where each column represents one variable.
(If some values in the data are missing the data cells should be set to `numpy.NaN`.
Note that pandas converts each column containing `numpy.NaN`s to dtype `float`.)

scoring_method: Instance of a `StructureScore`-subclass (`K2Score` is used as default)
An instance of `K2Score`, `BdeuScore`, or `BicScore`.
This score is optimized during structure estimation by the `estimate`-method.

state_names: dict (optional)
A dict indicating, for each variable, the discrete set of states (or values)
that the variable can take. If unspecified, the observed values in the data set
are taken to be the only possible states.

complete_samples_only: bool (optional, default `True`)
Specifies how to deal with missing data, if present. If set to `True` all rows
that contain `np.Nan` somewhere are ignored. If `False` then, for each variable,
every row where neither the variable nor its parents are `np.NaN` is used.
This sets the behavior of the `state_count`-method.
"""
if scoring_method is not None:
self.scoring_method = scoring_method
else:
self.scoring_method = K2Score(data, **kwargs)

super(HillClimbSearch, self).__init__(data, **kwargs)

コード例 #4

0

ファイルを表示

    def estimate(self,
                 tabu_length=100,
                 max_indegree=2,
                 black_list=None,
                 epsilon=1e-4,
                 max_iter=1e6,
                 show_progress=True):

        # We will be using K2Score for this model
        score = K2Score(data=self.data)
        # Model gets the score for a node and its parents
        # This is used on every iteration for all possible changes
        # This is greddy and picks the best available option
        score_fn = score.local_score
        # Initialize a Starting DAG
        # PGMPY made a DAG class that adds some functionality to nx.DiGrpah
        start_dag = DAG()
        start_dag.add_nodes_from(self.variables)
        # Set the edges we do not want to have in the graph
        if black_list is None:
            black_list = set()
        else:
            black_list = set(black_list)

        # Just change Maxindegree to a certain number when doing the model

        # I think this is to keep track of the changes we already made to the model
        tabu_list = deque(maxlen=tabu_length)
        # Initialize a current model
        current_model = start_dag
        if show_progress:
            iteration = trange(int(max_iter))
        else:
            iteration = range(int(max_iter))
        for _ in iteration:
            # Get the best operations based on K2 score with self._legal_operations
            best_operation, best_score_change = max(self._legal_operations(
                model=current_model,
                score=score_fn,
                tabu_list=tabu_list,
                max_indegree=max_indegree,
                black_list=black_list,
            ),
                                                    key=lambda t: t[1])

            if best_score_change < epsilon:
                break
            elif best_operation[0] == '+':
                current_model.add_edge(*best_operation[1])
                tabu_list.append(("-", best_operation[1]))
            elif best_operation[0] == '-':
                current_model.remove_edge(*best_operation[1])
                tabu_list.append(("+", best_operation[1]))
            elif best_operation[0] == 'flip':
                X, Y = best_operation[1]
                current_model.remove_edge(X, Y)
                current_model.add_edge(Y, X)
                tabu_list.append(best_operation)

        return current_model

コード例 #5

0

ファイルを表示

ファイル: structure_learning.py プロジェクト: ms440/bnlearn

def _SetScoringType(df, scoretype, verbose=3):
    if verbose >= 3: print('[bnlearn] >Set scoring type at [%s]' % (scoretype))

    if scoretype == 'bic':
        scoring_method = BicScore(df)
    elif scoretype == 'k2':
        scoring_method = K2Score(df)
    elif scoretype == 'bdeu':
        scoring_method = BDeuScore(df, equivalent_sample_size=5)

    return (scoring_method)

コード例 #6

0

ファイルを表示

def SetScoringType(df, scoretype, verbose=3):
    if verbose >= 3:
        print('[BNLEARN][STRUCTURE LEARNING] Set scoring type at [%s]' %
              (scoretype))

    if scoretype == 'bic':
        scoring_method = BicScore(df)
    elif scoretype == 'k2':
        scoring_method = K2Score(df)
    elif scoretype == 'bdeu':
        scoring_method = BdeuScore(df, equivalent_sample_size=5)

    return (scoring_method)

コード例 #7

0

ファイルを表示

def scoreStructureLearn(data,
                        search='HillClimbSearch',
                        scoring_method='BicScore'):
    #基于score-search的结构学习
    #search:HillClimbSearch, ExhaustiveSearch
    #scoring_method: 'BicScore', K2Score, BdeuScore
    if scoring_method == 'BicScore':
        scoring_method_tmp = BicScore(data)
    elif scoring_method == 'K2Score':
        scoring_method_tmp = K2Score(data)
    elif scoring_method == 'BdeuScore':
        scoring_method_tmp = BdeuScore(data, equivalent_sample_size=5)
    if search == 'HillClimbSearch':
        es = HillClimbSearch(data, scoring_method=scoring_method_tmp)
    else:
        es = ExhaustiveSearch(data, scoring_method=scoring_method_tmp)
    best_model = es.estimate()
    return best_model

コード例 #8

0

ファイルを表示

ファイル: test_HillClimbSearch.py プロジェクト: sajib-4414/comparative-study-with-pgmpy

    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)),
                                      columns=list("AB"))
        self.rand_data["C"] = self.rand_data["B"]
        self.est_rand = HillClimbSearch(self.rand_data,
                                        scoring_method=K2Score(self.rand_data))
        self.model1 = BayesianModel()
        self.model1.add_nodes_from(["A", "B", "C"])
        self.model2 = self.model1.copy()
        self.model2.add_edge("A", "B")

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv(
            "pgmpy/tests/test_estimators/testdata/titanic_train.csv")
        self.titanic_data1 = self.titanic_data[[
            "Survived", "Sex", "Pclass", "Age", "Embarked"
        ]]
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic1 = HillClimbSearch(self.titanic_data1)
        self.est_titanic2 = HillClimbSearch(self.titanic_data2)

コード例 #9

0

ファイルを表示

    def learn_structure(self, method, scoring_method, log=True):
        ''' (4)
        Method that builds the structure of the data
        -----------------
        Parameters:
        method          : The technique used to search for the structure
            -> scoring_approx     - To use an approximated search with scoring method
            -> scoring_exhaustive - To use an exhaustive search with scoring method
            -> constraint         - To use the constraint based technique
        scoring_method : K2, bic, bdeu
        log             - "True" if you want to print debug information in the console    
        '''

        #Select the scoring method for the local search of the structure
        if scoring_method == "K2":
            scores = K2Score(self.data)
        elif scoring_method == "bic":
            scores = BicScore(self.data)
        elif scoring_method == "bdeu":
            scores = BdeuScore(self.data)

        #Select the actual method
        if method == "scoring_approx":
            est = HillClimbSearch(self.data, scores)
        elif method == "scoring_exhaustive":
            est = ExhaustiveSearch(self.data, scores)
        elif method == "constraint":
            est = ConstraintBasedEstimator(self.data)

        self.best_model = est.estimate()
        self.eliminate_isolated_nodes(
        )  # REMOVE all nodes not connected to anything else

        for edge in self.best_model.edges_iter():
            self.file_writer.write_txt(str(edge))

        self.log("Method used for structural learning: " + method, log)
        #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log)
        self.log("Search terminated", log)

コード例 #10

0

ファイルを表示

ファイル: likelihhodRatioDiffRare.py プロジェクト: bhavikngala/comparision_of_PGM_machine_learning_and_deep_learning_approaches_to_handwriting_matching_with_images

def scoreModels(h0Diff, h0Rarity):
	diffModel0 = [('d5', 'd9'), ('d5', 'd3'), ('d3', 'd4'), ('d3', 'd8'), 
				  ('d9', 'd6'), ('d9', 'd1'), ('d9', 'd7'), ('d9', 'd8')]

	diffModel1 = [('d2', 'd5'), ('d5', 'd9'), ('d5', 'd3'), ('d3', 'd4'),
				  ('d3', 'd8'), ('d9', 'd6'), ('d9', 'd1'), ('d9', 'd7'),
				  ('d9', 'd8')]

	diffModel2 = [('d1', 'd2'), ('d5', 'd9'), ('d5', 'd3'), ('d3', 'd4'),
				  ('d3', 'd8'), ('d9', 'd6'), ('d9', 'd1'), ('d9', 'd7'),
				  ('d9', 'd8')]

	print(' \nestimating K2/BIC score of difference structures\n')
	print('k2score model0: {0}		BicScore model0: {1}'.format(
		K2Score(h0Diff).score(BayesianModel(diffModel0)),
		BicScore(h0Diff).score(BayesianModel(diffModel0))))
	print('k2score model1: {0}		BicScore model1: {1}'.format(
		K2Score(h0Diff).score(BayesianModel(diffModel1)),
		BicScore(h0Diff).score(BayesianModel(diffModel1))))
	print('k2score model2: {0}		BicScore model2: {1}'.format(
		K2Score(h0Diff).score(BayesianModel(diffModel2)),
		BicScore(h0Diff).score(BayesianModel(diffModel2))))

	rarityModel0 = [('r5', 'r9'), ('r5', 'r3'), ('r9', 'r1'), ('r8', 'r3'),
					('r6', 'r9'), ('r6', 'r3')]


	rarityModel1 = [('r6', 'r9'), ('r7', 'r9'), ('r3', 'r4'), ('r3', 'r5'),
					('r3', 'r9'), ('r2', 'r9'), ('r5', 'r9'), ('r9', 'r8'),
					('r9', 'r1')]

	rarityModel2 = [('r7', 'r9'), ('r4', 'r3'), ('r4', 'r9'), ('r1', 'r2'),
					('r1', 'r9'), ('r2', 'r9'), ('r5', 'r9'), ('r9', 'r8'),
					('r9', 'r6')]

	print(' \nestimating K2/BIC score of rarity structures\n')
	print('k2score model0: {0}		BicScore model0: {1}'.format(
		K2Score(h0Rarity).score(BayesianModel(rarityModel0)),
		BicScore(h0Rarity).score(BayesianModel(rarityModel0))))
	print('k2score model1: {0}		BicScore model1: {1}'.format(
		K2Score(h0Rarity).score(BayesianModel(rarityModel1)),
		BicScore(h0Rarity).score(BayesianModel(rarityModel1))))
	print('k2score model2: {0}		BicScore model2: {1}'.format(
		K2Score(h0Rarity).score(BayesianModel(rarityModel2)),
		BicScore(h0Rarity).score(BayesianModel(rarityModel2))))

コード例 #11

0

ファイルを表示

cpd1.append(p_64)
cpd1.append(p_36)
cpd1.append(p4)

model1.add_cpds(*cpd1)

print("------------------------------------------")
print("Edges of model1:", model1.edges())
print("Checking Model1:", model1.check_model())
print("------------------------------------------")
'''generate data for model1'''
inference = BayesianModelSampling(model1)
data=inference.forward_sample(size=3000, return_type='dataframe')
print("Data for model1:")
print(data)   
k2=K2Score(data)
print('Model1 K2 Score: ' + str(k2.score(model1)))

'''Inference'''
from pgmpy.inference import VariableElimination
infer = VariableElimination(model1)
print("Inference of x3:")
print(infer.query(['x3']) ['x3'])
print("Inference of x5|x2:")
print(infer.query(['x5'], evidence={ 'x2': 1}) ['x5'])


''''Model2'''

model2 = BayesianModel([('x1', 'x2'),('x1', 'x6'),('x2','x5'),('x2','x3'),('x6','x4')])
model2.add_cpds(p1,p_21,p_52,p_32,p_46,p_61)

コード例 #12

0

ファイルを表示

def create_BN_model_using_BayesianEstimator(data):
    #data = pd.DataFrame(sensor_data)#, columns= feature_names)#['X', 'Y'])
    #print(data)
    data = pd.DataFrame(
        data
    )  #read_data_from_file_remove_date_and_time(r"E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\sensor+PCA_n=5.csv" , data_type='float'))
    #print(data)

    #start_time = time.time()
    # 2 hours running, without output
    #hc = HillClimbSearch(data, scoring_method=BicScore(data))
    #best_model = hc.estimate()
    #print(hc.scoring_method)
    #print(best_model.edges())
    #end_time = time.time()
    #print("execution time in seconds:")
    #print(end_time-start_time)

    #start_time = time.time()
    #hc = HillClimbSearch(data, scoring_method=BdeuScore(data))
    #best_model = hc.estimate()
    #print(hc.scoring_method)
    #print(best_model.edges())
    #end_time = time.time()
    #print("execution time in seconds:")
    #print(end_time-start_time)

    #structure learning
    print("structure learning")
    start_time = time.time()
    hc = HillClimbSearch(data, scoring_method=K2Score(
        data))  #BicScore(data))#K2Score(data))BdeuScore(data)
    best_model = hc.estimate()
    print(hc.scoring_method)
    print(best_model.edges())
    end_time = time.time()
    print("execution time in seconds:{}".format(end_time - start_time))

    #parameter learning
    #model = BayesianModel([('A', 'C'), ('B', 'C')])
    #model.fit(data)
    #model.get_cpds()

    ######
    #best_model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D')])

    casas7_model = BayesianModel(best_model.edges())
    print("*******************")
    #BayesianEstimator.get_parameters(self, prior_type, equivalent_sample_size, pseudo_counts)
    #####estimator = BayesianEstimator(best_model, data)
    #####print(estimator.get_parameters(prior_type='K2'))#, equivalent_sample_size=5)

    estimator = BayesianEstimator(casas7_model, data)

    #casas7_model.fit(data, estimator=BayesianEstimator, prior_type="K2")#MaximumLikelihoodEstimator)
    ######print(casas7_model.get_cpds())
    ###casas7_model.predict(data)
    #print("casas7_model.node:{}".format(casas7_model.node))

    ########return estimator
    return estimator

コード例 #13

0

ファイルを表示

import numpy as np
import pandas as pd
from pgmpy.estimators import HillClimbSearch
from pgmpy.models import BayesianModel
from pgmpy.estimators import K2Score
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
from pgmpy.inference import VariableElimination

feature_val1 = pd.read_csv('15features_f.csv')
'''1pen_pressure	2letter_spacing	3size	4dimension	 5is_lowercase
6is_continuous 7slantness 8tilt	9entry_stroke_a
10staff_of_a	11formation_n	 12staff_of_d	 13exit_stroke_d	 
14word_formation  15constancy'''

hill = HillClimbSearch(feature_val1, scoring_method=K2Score(feature_val1))
f_model = hill.estimate()
print(f_model.edges())

feature_val2 = pd.read_csv('15features_g.csv')

hill1 = HillClimbSearch(feature_val2, scoring_method=K2Score(feature_val2))
g_model = hill1.estimate()
print(g_model.edges())

corr_mat = feature_val1.corr()
print(corr_mat)
corr_feature = set()
for i in range(len(corr_mat.columns)):
    for j in range(i):
        if abs(corr_mat.iloc[i, j]) > 0.2:

コード例 #14

0

ファイルを表示

model5.add_edges_from([('x1', 'x2'), ('x1', 'x6'), ('x6', 'x4'), ('x2', 'x3'),
                       ('x3', 'x5')])
model5.add_cpds(cpd_x1, cpd_x1x2, cpd_x1x6, cpd_x6x4, cpd_x2x3, cpd_x3x5)
inference = BayesianModelSampling(model5)
# print(inference.forward_sample(size=1000, return_type='dataframe'))
data5 = inference.forward_sample(size=1000, return_type='dataframe')

# ##### Evaluating the models using K2 score on the generated data

# In[70]:

# Evaluating the models on the data sets generated by them
data = pd.concat([data1, data2, data3, data4, data5])
data.shape

k2 = K2Score(data)

print('Model 1 K2 Score: ' +
      str(k2.score(model1)))  # model 1 is the best model
print('Model 2 K2 Score: ' + str(k2.score(model2)))
print('Model 3 K2 Score: ' + str(k2.score(model3)))
print('Model 4 K2 Score: ' + str(k2.score(model4)))
print('Model 5 K2 Score: ' + str(k2.score(model5)))

# ##### Find the high and low probability patterns of 'th'

# In[153]:

# Finding 'th' highest frequency pattern
frequency = data.groupby(['x1', 'x2', 'x3', 'x4', 'x5',
                          'x6']).size().to_frame('count').reset_index()

コード例 #15

0

ファイルを表示

ファイル: pgm_tutorial.py プロジェクト: mpierrau/DD2420_Probabilistical_Graphical_Models

models = [model1, model2]

[m.fit(data) for m in models]  # ML-fit

STATE_NAMES = model1.cpds[0].state_names
print('\nState names:')
for s in STATE_NAMES:
    print(s, STATE_NAMES[s])

# Information for the curious:
# Structure-scores: http://pgmpy.org/estimators.html#structure-score
# K2-score: for instance http://www.lx.it.pt/~asmc/pub/talks/09-TA/ta_pres.pdf
# Additive smoothing and pseudocount: https://en.wikipedia.org/wiki/Additive_smoothing
# Scoring functions: https://www.cs.helsinki.fi/u/bmmalone/probabilistic-models-spring-2014/ScoringFunctions.pdf
k2 = K2Score(data)
print('Structure scores:', [k2.score(m) for m in models])

separator()

print('\n\nExhaustive structure search based on structure scores:')

from pgmpy.estimators import ExhaustiveSearch
from pgmpy.estimators import HillClimbSearch
from pgmpy.estimators import BicScore

# Warning: Doing exhaustive search on a PGM with all 5 variables
# takes more time than you should have to wait. Hence
# re-fit the models to data where some variable(s) has been removed
# for this assignement.
raw_data2 = {

コード例 #16

0

ファイルを表示

ファイル: opt.py プロジェクト: yuanziqiang1995/BITSS-BNS

    def opt(self, file1, file2):
        f1 = open(file1, encoding="utf8")
        lines = f1.readlines()
        nodes = self.getegdes(lines[0])
        edges = self.getegdes(lines[1])
        data = pd.read_csv(file2)

        G = BayesianModel()
        G.add_nodes_from(nodes)
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])
        # nx.draw(G)
        # plt.show()
        k2 = K2Score(data).score(G)
        bic = BicScore(data).score(G)
        bdeu = BDeuScore(data).score(G)
        print(k2, ",", bic, ",", bdeu)

        est = HillClimbSearch(data, scoring_method=K2Score(data))
        model = est.estimate()
        model_edges = model.edges()
        G_ = nx.DiGraph()
        G_.add_edges_from(model_edges)
        G_copy = nx.DiGraph()
        G_copy.add_edges_from(G.edges)
        add = []
        add_mut = []
        delete = []
        delete_mut = []
        # a = list(G.edges._adjdict.key())
        for edge in model_edges:
            node1 = edge[0]
            node2 = edge[1]
            if not nx.has_path(G, node2, node1):
                if not G.has_edge(node1, node2):
                    this = (node1, node2)
                    # this = '('+node1+','+node2+')'
                    add.append(this)
                    x = data[node1]
                    mut = mr.mutual_info_score(data[node1], data[node2])
                    add_mut.append(mut)
        seq = list(zip(add_mut, add))
        seq = sorted(seq, key=lambda s: s[0], reverse=True)
        alpha = 0.015
        # if seq[0][0] > alpha:
        #     add = seq[0:1]

        add = seq[0:1]

        data_edges = []
        for edge in G.edges:
            node1 = edge[0]
            node2 = edge[1]
            mut = mr.mutual_info_score(data[node1], data[node2])
            delete_mut.append(mut)
            data_edges.append(edge)
            # if not (nx.has_path(G_, node1, node2) or nx.has_path(G_, node2, node1)):
            #     this = '('+node1+','+node2+')'
            #     delete.append(this)
        seq = list(zip(delete_mut, data_edges))
        seq = sorted(seq, key=lambda s: s[0])

        # if seq[0][0] < alpha:
        #     delete = seq[0:1]
        if len(edges) > 2:
            delete = seq[0:1]
            if len(add) > 0:
                if delete[0][0] > add[0][0]:
                    delete = []

        print('add')
        for i in add:
            print(str(i[1]) + "," + str(i[0]))

        print('delete')
        for j in delete:
            print(str(j[1]) + "," + str(j[0]))
            # print(j[0])

        print('cpt')
        estimator = BayesianEstimator(G, data)
        for i in G.nodes:
            cpd = estimator.estimate_cpd(i, prior_type="K2")
            nodeName = i
            values = dict(data[i].value_counts())
            valueNum = len(values)
            CPT = np.transpose(cpd.values)
            # CPT = cpd.values
            sequence = cpd.variables[1::]
            card = []
            for x in sequence:
                s = len(dict(data[x].value_counts()))
                card.append(s)
            output = nodeName + '\t' + str(valueNum) + '\t' + str(
                CPT.tolist()) + '\t' + str(sequence) + '\t' + str(card)
            print(output)

        print('mutual')
        output1 = []
        for i in range(int(len(edges) / 2)):
            mut = mr.mutual_info_score(data[edges[2 * i]],
                                       data[edges[2 * i + 1]])
            output1.append(mut)
        output2 = {}
        for node1 in G.nodes():
            d = {}
            for node2 in G.nodes():
                if node1 == node2:
                    continue
                mut = mr.mutual_info_score(data[node1], data[node2])

                d[node2] = mut
            output2[node1] = d
        print(output1)
        print(output2)

コード例 #17

0

ファイルを表示

import matplotlib.pyplot as plt
import networkx as nx
from sklearn.preprocessing import KBinsDiscretizer

data = pd.read_csv("data/data_auto_mpg.csv")
# data = pd.DataFrame(np.random.randn(500, 5), columns=list('ABCDE'))
# data['F'] = data['A'] * data['B']

for col in data.columns:
    if (data[col].dtype == np.float64 or data[col].dtype == np.float32):
        # bin_size = np.unique(data[col].values).shape[0]
        # kbins = KBinsDiscretizer(n_bins=bin_size, encode='ordinal', strategy='uniform').fit(data[col].values.reshape(-1,1))
        # data[col] = kbins.transform(data[col].values.reshape(-1,1)).astype(np.int64)
        data[col] = data[col].astype(np.int64)

data = data.iloc[:, :10]

print(data.dtypes)
print(data)

print("aq")
est = HillClimbSearch(data, scoring_method=K2Score(data))
print("aq")
model = est.estimate(max_indegree=5)
print("aq")

print(model.edges)

plt.figure()
nx.draw_networkx(model)
plt.show()

コード例 #18

0

ファイルを表示

#feature_names.append("Person")
#print(feature_names)
#mydata = np.random.randint(low=0, high=2,size=(100, 6))
mydata = np.genfromtxt(r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\Aras\House A\CSV_Summery\Sequential\Day\occur\Whole_data.csv', delimiter=",")
#pd.read_csv(r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\data.csv')
#print(mydata)
feature_names = [str(i) for i in range (1,41)]
feature_names.append("Person")
feature_names.append("activity")
print(feature_names)
data = pd.DataFrame(mydata, columns= feature_names)#['X', 'Y'])
print(data)

list_of_scoring_methods = [#BicScore(data),
                           #BdeuScore(data),
                           K2Score(data)]

for scoreMethod in list_of_scoring_methods:
    start_time = time.time()
    hc = HillClimbSearch(data, scoreMethod)
    best_model = hc.estimate()
    print(hc.scoring_method)
    print(best_model.edges())
    end_time = time.time()
    print("execution time in seconds:")
    print(end_time-start_time)



#casas7_model = BayesianModel()
#casas7_model.fit(data, estimator=BayesianEstimator)#MaximumLikelihoodEstimator)

コード例 #19

0

ファイルを表示

ファイル: test_K2Score.py プロジェクト: studosi-fer/STRUCE

 def test_score(self):
     self.assertAlmostEqual(K2Score(self.d1).score(self.m1), -10.73813429536977)
     self.assertEqual(K2Score(self.d1).score(BayesianModel()), 0)

コード例 #20

0

ファイルを表示

from pgmpy.estimators import ExhaustiveSearch, K2Score

if __name__ == '__main__':
#     fp = os.path.join('data', 'MTurk_Harvey.csv')
#     df = pd.read_csv(fp)
#     data = np.genfromtxt(fp, delimiter=",", dtype=float, skip_header=1)
#     x = data[:,:-1]
#     y = data[:,-1]
    
#     data = pd.DataFrame(np.random.randint(0, 5, size=(2500, 3)), columns=list('XYZ'))
#     data['sum'] = data.sum(axis=1)
#     #print(data)
    
#     est = ConstraintBasedEstimator(data)
#     skel, sep_sets = est.estimate_skeleton()
#     print(skel.edges())

#     s = ExhaustiveSearch(pd.DataFrame(data={'Temperature': [23, 19],'Weather': ['sunny', 'cloudy'],'Humidity': [65, 75]}))
#     print(len(list(s.all_dags())))
#     for dag in s.all_dags():
#         print(dag.edges())
        
    data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
    data['C'] = data['B']
    searcher = ExhaustiveSearch(data, scoring_method=K2Score(data))
    for score, model in searcher.all_scores():
        print score
        print model.edges()

コード例 #21

0

ファイルを表示

ファイル: pgm_tutorial.py プロジェクト: lijitaonuaa5/Courses

models = [model1, model2]

[m.fit(data) for m in models]  # ML-fit

STATE_NAMES = model1.cpds[0].state_names
#print(model2.cpds[3])
print('\nState names:')
for s in STATE_NAMES:
    print(s, STATE_NAMES[s])

# Information for the curious:
# Structure-scores: http://pgmpy.org/estimators.html#structure-score
# K2-score: for instance http://www.lx.it.pt/~asmc/pub/talks/09-TA/ta_pres.pdf
# Additive smoothing and pseudocount: https://en.wikipedia.org/wiki/Additive_smoothing
# Scoring functions: https://www.cs.helsinki.fi/u/bmmalone/probabilistic-models-spring-2014/ScoringFunctions.pdf
k2 = K2Score(data)
print('Structure scores:', [k2.score(m) for m in models])

separator()

print('\n\nExhaustive structure search based on structure scores:')

from pgmpy.estimators import ExhaustiveSearch, HillClimbSearch, BicScore

# Warning: Doing exhaustive search on a PGM with all 5 variables
# takes more time than you should have to wait. Hence
# re-fit the models to data where some variable(s) has been removed
# for this assignement.
raw_data2 = {
    'age': data['age'],
    'avg_cs': data['avg_cs'],

コード例 #22

0

ファイルを表示

ファイル: 9_LearnBayesNetFromData.py プロジェクト: statisticallyfit/PythonProbabilisticGraphicalModels

# %% codecell
from pgmpy.estimators import BDeuScore, K2Score, BicScore

# Create random data sample with 3 variables, where Z is dependent on X, Y:
data: DataFrame = DataFrame(data=np.random.randint(low=0,
                                                   high=4,
                                                   size=(5000, 2)),
                            columns=list('XY'))

# Making Z dependent (in some arbitrary relation like addition) on X and Y
data['Z'] = data['X'] + data['Y']

# %% codecell
# Creating the scoring objects from this data:
bdeu: BDeuScore = BDeuScore(data, equivalent_sample_size=5)
k2: K2Score = K2Score(data=data)
bic: BicScore = BicScore(data=data)

# %% codecell
commonEvidenceModel: BayesianModel = BayesianModel([('X', 'Z'), ('Y', 'Z')])
drawGraph(commonEvidenceModel)
# %% codecell
commonCauseModel: BayesianModel = BayesianModel([('X', 'Z'), ('X', 'Y')])
drawGraph(commonCauseModel)

# %% codecell
bdeu.score(commonEvidenceModel)
# %% codecell
k2.score(commonEvidenceModel)
# %% codecell
bic.score(commonEvidenceModel)

コード例 #23

0

ファイルを表示

                    PRED[X_j] = NEW_PRED[i, Xj]

                X_mat = X_mat.difference(S)
                X_pred = X_pred.intersection(S)

        break


def pi(G, Xi):
    return set([p for p, f in G.edges if f == Xi])


def beta(G, xi):
    pass


data = pd.read_csv("../data/asia.csv")
newData = data.copy()

for col in newData.columns:
    if (newData[col].dtype == np.float64 or newData[col].dtype == np.float32):
        newData[col] = newData[col].astype(np.int64)

newData = newData.iloc[:, :7]
e_t = [1, 1, 1, 1, 1, 1]

G = HillClimbSearch(newData,
                    scoring_method=K2Score(newData)).estimate(max_indegree=5)

MaxIndependentSet(data, e_t, G, pi)

コード例 #24

0

ファイルを表示

import pandas as pd
from pgmpy.estimators import HillClimbSearch, ExhaustiveSearch
from pgmpy.estimators import BDeuScore, BicScore, K2Score
##结构学习
data = pd.read_csv('data.csv', encoding='gb18030')
df = pd.DataFrame(data)
bic = BicScore(df)
k2 = K2Score(df)
hc = HillClimbSearch(df, scoring_method=bic)
#hc = ExhaustiveSearch(df, k2)
model = hc.estimate()
for ee in model.edges():
    print(ee)



##参数学习
from pgmpy.models import BayesianModel
mod = BayesianModel(model.edges())
mod.fit(df)
for cpd in mod.get_cpds():
    print(cpd)

#print(mod.local_independencies('HA'))

##模型推理
from pgmpy.inference import VariableElimination, BeliefPropagation
cancer_infer = VariableElimination(mod)
q = cancer_infer.query(variables=['HA'])
print(q)