Python BicScore 예제들, pgmpy.estimators.BicScore Python 예제들

예제 #1

0

파일 보기

 def test_score_titanic(self):
     scorer = BicScore(self.titanic_data2)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1896.7250012840179)
     titanic2 = BayesianModel([("Pclass", "Sex")])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))

예제 #2

0

파일 보기

파일: pgm_explainer.py 프로젝트: jxzhangjhu/PGMExplainer

    def pgm_generate(self, target, data, pgm_stats, subnodes, child=None):

        subnodes = [str(int(node)) for node in subnodes]
        target = str(int(target))
        subnodes_no_target = [node for node in subnodes if node != target]
        data.columns = data.columns.astype(str)

        MK_blanket = self.search_MK(data, target, subnodes_no_target.copy())

        if child == None:
            est = HillClimbSearch(data[subnodes_no_target],
                                  scoring_method=BicScore(data))
            pgm_no_target = est.estimate()
            for node in MK_blanket:
                if node != target:
                    pgm_no_target.add_edge(node, target)

        #   Create the pgm
            pgm_explanation = BayesianModel()
            for node in pgm_no_target.nodes():
                pgm_explanation.add_node(node)
            for edge in pgm_no_target.edges():
                pgm_explanation.add_edge(edge[0], edge[1])

        #   Fit the pgm
            data_ex = data[subnodes].copy()
            data_ex[target] = data[target].apply(self.generalize_target)
            for node in subnodes_no_target:
                data_ex[node] = data[node].apply(self.generalize_others)
            pgm_explanation.fit(data_ex)
        else:
            data_ex = data[subnodes].copy()
            data_ex[target] = data[target].apply(self.generalize_target)
            for node in subnodes_no_target:
                data_ex[node] = data[node].apply(self.generalize_others)

            est = HillClimbSearch(data_ex, scoring_method=BicScore(data_ex))
            pgm_w_target_explanation = est.estimate()

            #   Create the pgm
            pgm_explanation = BayesianModel()
            for node in pgm_w_target_explanation.nodes():
                pgm_explanation.add_node(node)
            for edge in pgm_w_target_explanation.edges():
                pgm_explanation.add_edge(edge[0], edge[1])

            #   Fit the pgm
            data_ex = data[subnodes].copy()
            data_ex[target] = data[target].apply(self.generalize_target)
            for node in subnodes_no_target:
                data_ex[node] = data[node].apply(self.generalize_others)
            pgm_explanation.fit(data_ex)

        return pgm_explanation

예제 #3

0

파일 보기

def model_change(dag, data):
    bay_model = []
    data = pd.DataFrame(data)
    print(data)
    for i in range(len(dag)):
        for j in range(np.size(data, 1)):
            if dag[i][j] != 0:
                bay_model.append((str(i), str(j)))
    best_model = BayesianModel(bay_model)
    bic = BicScore(data)
    return bic.score(best_model)

예제 #4

0

파일 보기

파일: tools.py 프로젝트: afcarl/collaborative_filter_website_example

def train_joke_type_selection():
    #one table
    jokes = Jokes.query.all()
    joke_preferences = []
    for i in range(
            sum([joke_preference.nerd_joke for joke_preference in jokes])):
        joke_preferences.append("nerd joke")
    for i in range(sum([joke.weird_joke for joke in jokes])):
        joke_preferences.append("weird joke")
    for i in range(sum([joke.cat_meme for joke in jokes])):
        joke_preferences.append("cat meme")
    for i in range(sum([joke.dog_meme for joke in jokes])):
        joke_preferences.append("dog meme")
    for i in range(sum([joke.dad_joke for joke in jokes])):
        joke_preferences.append("dad joke")
    data = pd.DataFrame()
    for joke_preference in joke_preferences:
        data = data.append({"joke_preference": joke_preference},
                           ignore_index=True)

    bic = BicScore(data)
    import code
    code.interact(local=locals())
    es = ExhaustiveSearch(data, scoring_method=bic)
    best_model = es.estimate()
    return best_model

예제 #5

0

파일 보기

    def pgm_generate(self, target, data, stats, subnodes):
        stats_pd = pd.Series(stats, name='p-values')
        MK_blanket_frame = stats_pd[stats_pd < 0.05]
        MK_blanket = [node for node in MK_blanket_frame.index if node in subnodes]
        subnodes_no_target = [node for node in subnodes if node != target]
        est = HillClimbSearch(data[subnodes_no_target], scoring_method=BicScore(data))
        pgm_no_target = est.estimate()
        for node in MK_blanket:
            if node != target:
                pgm_no_target.add_edge(node,target)

    #   Create the pgm    
        pgm_explanation = BayesianModel()
        for node in pgm_no_target.nodes():
            pgm_explanation.add_node(node)
        for edge in pgm_no_target.edges():
            pgm_explanation.add_edge(edge[0],edge[1])

    #   Fit the pgm
        data_ex = data[subnodes].copy()
        data_ex[target] = data[target].apply(self.generalize_target)
        for node in subnodes_no_target:
            data_ex[node] = data[node].apply(self.generalize_others)
        pgm_explanation.fit(data_ex)

        return pgm_explanation

예제 #6

0

파일 보기

def mutacao(x, fitness_aux, prob, max_v, min_v):
    if len(x) * len(x[0]) * prob < 1:
        print("entando")
        for i in range(len(x)):
            for j in range(len(x[i])):
                r = random.random()
                if r <= prob:
                    valor_mut = x[i][j]
                    while (valor_mut == x[i][j]):
                        valor_mut = min_v + random.randint(
                            min_valor, max_valor)
                    x[i][j] = valor_mut
    else:
        numero_mutacao = round(len(x) * len(x[0]) * prob)
        while (numero_mutacao > 0):
            ind_escolhido = round(random.random() * (len(x) - 1))
            val = round(random.random() * (len(x[ind_escolhido]) - 1))
            valor_mut = deepcopy(x[ind_escolhido][val])
            valor_mut_antigo = deepcopy(x[ind_escolhido][val])
            while (valor_mut == x[ind_escolhido][val]):
                valor_mut = min_v + random.randint(min_valor, max_valor)
            x[ind_escolhido][val] = valor_mut
            if x[ind_escolhido][val] not in nao_dag:
                G = vetor_Rede(x[ind_escolhido], nodes)
                if G:
                    fitness_aux[ind_escolhido] = abs(BicScore(data).score(G))
                    numero_mutacao = numero_mutacao - 1
                else:
                    nao_dag.append(x[ind_escolhido])
                    x[ind_escolhido][val] = valor_mut_antigo
            else:
                x[ind_escolhido][val] = valor_mut_antigo

예제 #7

0

파일 보기

파일: model_learning.py 프로젝트: bhavikngala/comparision_of_PGM_machine_learning_and_deep_learning_approaches_to_handwriting_matching_with_images

def learnedStructureModel():
    # trainingData, testingData = differenceBetweenFeatures(True)
    trainingInputs, trainingOutputs, testingInputs, testingOutputs = \
     gtd.formSameWriterDiffWriterInputOutputFeaturePairs(5, True)

    trainingData = pd.DataFrame(
     data = np.concatenate((trainingInputs, trainingOutputs), axis=1),
     columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9',\
      'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19',
      'h'])

    testingData = pd.DataFrame(
     data = np.concatenate((testingInputs, testingOutputs), axis=1),
     columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9',\
      'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19',
      'h'])

    #trainingData = trainingData.drop(['f9', 'f18'], axis=1)
    #testingData = testingData.drop(['f9', 'f18'], axis=1)

    hc = HillClimbSearch(trainingData, scoring_method=BicScore(trainingData))
    model = hc.estimate(max_indegree=20)

    state_names = {
        'f1': [0, 1, 2, 3],
        'f2': [0, 1, 2, 3, 4],
        'f3': [0, 1, 2],
        'f4': [0, 1, 2, 3, 4],
        'f5': [0, 1, 2, 3],
        'f6': [0, 1, 2, 3],
        'f7': [0, 1, 2, 3],
        'f8': [0, 1, 2, 3, 4],
        'f9': [0, 1, 2],
        'f11': [0, 1, 2, 3],
        'f12': [0, 1, 2, 3, 4],
        'f13': [0, 1, 2],
        'f14': [0, 1, 2, 3, 4],
        'f15': [0, 1, 2, 3],
        'f16': [0, 1, 2, 3],
        'f17': [0, 1, 2, 3],
        'f18': [0, 1, 2, 3, 4],
        'f19': [0, 1, 2],
        'h': [0, 1]
    }

    # fit model and data, compute CPDs
    model.fit(trainingData,
              estimator=BayesianEstimator,
              prior_type='BDeu',
              state_names=state_names)

    print(model.edges())

    # inference object
    # computing probability of Hyothesis given evidence
    evidenceNodes = ['f1','f2','f3','f4','f5','f6','f7','f8','f9',\
      'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19']
    evaluateModel(model, testingData, 'h', evidenceNodes)

예제 #8

0

파일 보기

파일: bayesian_network.py 프로젝트: hinanmu/LEAD

def build_structure(data):
    df = pd.DataFrame(data)
    est = HillClimbSearch(df, scoring_method=BicScore(df))
    model = est.estimate()
    DAG = np.zeros((data.shape[1], data.shape[1]), np.int64)

    for edge in model.edges():
        DAG[edge[0], edge[1]] = 1

    np.save('dataset/DAG.npy', DAG)
    return DAG

예제 #9

0

파일 보기

파일: structure_learning.py 프로젝트: ms440/bnlearn

def _SetScoringType(df, scoretype, verbose=3):
    if verbose >= 3: print('[bnlearn] >Set scoring type at [%s]' % (scoretype))

    if scoretype == 'bic':
        scoring_method = BicScore(df)
    elif scoretype == 'k2':
        scoring_method = K2Score(df)
    elif scoretype == 'bdeu':
        scoring_method = BDeuScore(df, equivalent_sample_size=5)

    return (scoring_method)

예제 #10

0

파일 보기

def main():
    data, string = readData()
    genes = np.array(data.columns[1:])
    labels = np.array(data.columns)

    bayesianModel = BayesianModel()
    transitionModel = DBN()

    bayesianModel.add_nodes_from(genes)
    transitionModel.add_nodes_from(genes)

    bData, tData = getData(data, labels)
    
    print "\nDynamic Bayesian Network inference", 
    print "\nB_0 network relations:  "
    
    hcb = HillClimbSearch(bData, genes, scoring_method=BicScore(bData, labels, bk1=string, weight=4))
    best_model_b = hcb.estimate(start=bayesianModel, tabu_length=15, max_indegree=2)
    print(best_model_b.edges())

    printOutputB(best_model_b)

    print "\nLocal Probability Model: "
    best_model_b.fit(bData, BayesianEstimator)
    for cpd in best_model_b.get_cpds():
        print(cpd)

    print "\nB_transition network relations: "

    hct = HillClimbSearch(tData, genes, scoring_method=BicScore(tData, labels, bk1=string, weight=4))
    best_model_t = hct.estimate_dynamic(start=transitionModel, tabu_length=15, max_indegree=2)
    print(best_model_t.edges())

    printOutputT(best_model_t)

    print "\nLocal Probability Model: "
    best_model_t.fit(tData, BayesianEstimator)
    for cpd in best_model_t.get_cpds():
        print(cpd)

예제 #11

0

파일 보기

def SetScoringType(df, scoretype, verbose=3):
    if verbose >= 3:
        print('[BNLEARN][STRUCTURE LEARNING] Set scoring type at [%s]' %
              (scoretype))

    if scoretype == 'bic':
        scoring_method = BicScore(df)
    elif scoretype == 'k2':
        scoring_method = K2Score(df)
    elif scoretype == 'bdeu':
        scoring_method = BdeuScore(df, equivalent_sample_size=5)

    return (scoring_method)

예제 #12

0

파일 보기

def bei_ye_si():
    warnings.filterwarnings("ignore")
    print('现在进行的算法是贝叶斯网络')
    f = open('泰坦尼克号.txt')
    dataset = pd.read_table(f, delim_whitespace=True)
    train = dataset[:800]
    test = dataset[800:]
    hc = HillClimbSearch(train, scoring_method=BicScore(train))
    best_model = hc.estimate()
    best_model.fit(train, estimator=BayesianEstimator,
                   prior_type="BDeu")  # default equivalent_sample_size=5
    predict_data = test.drop(columns=['Survived'], axis=1)
    y_pred = best_model.predict(predict_data)
    print(
        (y_pred['Survived'] == test['Survived']).sum() / len(test))  # 测试集精度'''

예제 #13

0

파일 보기

파일: test_ExhaustiveSearch.py 프로젝트: studosi-fer/STRUCE

    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)),
                                      columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = ExhaustiveSearch(self.rand_data)
        self.est_rand_bdeu = ExhaustiveSearch(self.rand_data,
                                              scoring_method=BdeuScore(
                                                  self.rand_data))
        self.est_rand_bic = ExhaustiveSearch(self.rand_data,
                                             scoring_method=BicScore(
                                                 self.rand_data))

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv(
            'pgmpy/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic = ExhaustiveSearch(self.titanic_data2)

예제 #14

0

파일 보기

def scoreStructureLearn(data,
                        search='HillClimbSearch',
                        scoring_method='BicScore'):
    #基于score-search的结构学习
    #search:HillClimbSearch, ExhaustiveSearch
    #scoring_method: 'BicScore', K2Score, BdeuScore
    if scoring_method == 'BicScore':
        scoring_method_tmp = BicScore(data)
    elif scoring_method == 'K2Score':
        scoring_method_tmp = K2Score(data)
    elif scoring_method == 'BdeuScore':
        scoring_method_tmp = BdeuScore(data, equivalent_sample_size=5)
    if search == 'HillClimbSearch':
        es = HillClimbSearch(data, scoring_method=scoring_method_tmp)
    else:
        es = ExhaustiveSearch(data, scoring_method=scoring_method_tmp)
    best_model = es.estimate()
    return best_model

예제 #15

0

파일 보기

    def learn_structure(self, method, scoring_method, log=True):
        ''' (4)
        Method that builds the structure of the data
        -----------------
        Parameters:
        method          : The technique used to search for the structure
            -> scoring_approx     - To use an approximated search with scoring method
            -> scoring_exhaustive - To use an exhaustive search with scoring method
            -> constraint         - To use the constraint based technique
        scoring_method : K2, bic, bdeu
        log             - "True" if you want to print debug information in the console    
        '''

        #Select the scoring method for the local search of the structure
        if scoring_method == "K2":
            scores = K2Score(self.data)
        elif scoring_method == "bic":
            scores = BicScore(self.data)
        elif scoring_method == "bdeu":
            scores = BdeuScore(self.data)

        #Select the actual method
        if method == "scoring_approx":
            est = HillClimbSearch(self.data, scores)
        elif method == "scoring_exhaustive":
            est = ExhaustiveSearch(self.data, scores)
        elif method == "constraint":
            est = ConstraintBasedEstimator(self.data)

        self.best_model = est.estimate()
        self.eliminate_isolated_nodes(
        )  # REMOVE all nodes not connected to anything else

        for edge in self.best_model.edges_iter():
            self.file_writer.write_txt(str(edge))

        self.log("Method used for structural learning: " + method, log)
        #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log)
        self.log("Search terminated", log)

예제 #16

0

파일 보기

파일: likelihhodRatioDiffRare.py 프로젝트: bhavikngala/comparision_of_PGM_machine_learning_and_deep_learning_approaches_to_handwriting_matching_with_images

def scoreModels(h0Diff, h0Rarity):
	diffModel0 = [('d5', 'd9'), ('d5', 'd3'), ('d3', 'd4'), ('d3', 'd8'), 
				  ('d9', 'd6'), ('d9', 'd1'), ('d9', 'd7'), ('d9', 'd8')]

	diffModel1 = [('d2', 'd5'), ('d5', 'd9'), ('d5', 'd3'), ('d3', 'd4'),
				  ('d3', 'd8'), ('d9', 'd6'), ('d9', 'd1'), ('d9', 'd7'),
				  ('d9', 'd8')]

	diffModel2 = [('d1', 'd2'), ('d5', 'd9'), ('d5', 'd3'), ('d3', 'd4'),
				  ('d3', 'd8'), ('d9', 'd6'), ('d9', 'd1'), ('d9', 'd7'),
				  ('d9', 'd8')]

	print(' \nestimating K2/BIC score of difference structures\n')
	print('k2score model0: {0}		BicScore model0: {1}'.format(
		K2Score(h0Diff).score(BayesianModel(diffModel0)),
		BicScore(h0Diff).score(BayesianModel(diffModel0))))
	print('k2score model1: {0}		BicScore model1: {1}'.format(
		K2Score(h0Diff).score(BayesianModel(diffModel1)),
		BicScore(h0Diff).score(BayesianModel(diffModel1))))
	print('k2score model2: {0}		BicScore model2: {1}'.format(
		K2Score(h0Diff).score(BayesianModel(diffModel2)),
		BicScore(h0Diff).score(BayesianModel(diffModel2))))

	rarityModel0 = [('r5', 'r9'), ('r5', 'r3'), ('r9', 'r1'), ('r8', 'r3'),
					('r6', 'r9'), ('r6', 'r3')]


	rarityModel1 = [('r6', 'r9'), ('r7', 'r9'), ('r3', 'r4'), ('r3', 'r5'),
					('r3', 'r9'), ('r2', 'r9'), ('r5', 'r9'), ('r9', 'r8'),
					('r9', 'r1')]

	rarityModel2 = [('r7', 'r9'), ('r4', 'r3'), ('r4', 'r9'), ('r1', 'r2'),
					('r1', 'r9'), ('r2', 'r9'), ('r5', 'r9'), ('r9', 'r8'),
					('r9', 'r6')]

	print(' \nestimating K2/BIC score of rarity structures\n')
	print('k2score model0: {0}		BicScore model0: {1}'.format(
		K2Score(h0Rarity).score(BayesianModel(rarityModel0)),
		BicScore(h0Rarity).score(BayesianModel(rarityModel0))))
	print('k2score model1: {0}		BicScore model1: {1}'.format(
		K2Score(h0Rarity).score(BayesianModel(rarityModel1)),
		BicScore(h0Rarity).score(BayesianModel(rarityModel1))))
	print('k2score model2: {0}		BicScore model2: {1}'.format(
		K2Score(h0Rarity).score(BayesianModel(rarityModel2)),
		BicScore(h0Rarity).score(BayesianModel(rarityModel2))))

예제 #17

0

파일 보기

파일: bayesianNetwork.py 프로젝트: JeremyKer/Reseaux-Bayesiens

        'Cancer': cancer,
        'Age': age,
        'Tuberculose': tuberculosis,
        'TbOuCa': tbOrCa,
        'VisiteAsie': visitAsia,
        'Radiographie': xray,
        'Bronchite': bronchitis,
        'Dyspnea': dyspnea,
        'Geographie': geographical
    })
print(data)

#Apprentissage de la structure
from pgmpy.estimators import HillClimbSearch, BicScore

bic = BicScore(data)
hc = HillClimbSearch(data, scoring_method=bic)
best_model = hc.estimate()
print(best_model.edges())
# la relecture de la structure trouvée révèle que le programme donne les liaisons mais pas le sens de ces dernières.
# le model avec le bon sens serait donc :
bon_model = BayesianModel([('Cancer', 'TbOuCa'), ('TbOuCa', 'Dyspnea'),
                           ('TbOuCa', 'Bronchite'), ('TbOuCa', 'Radiographie'),
                           ('Fumeur', 'Bronchite'),
                           ('Radiographie', 'Dyspnea'),
                           ('Tuberculose', 'TbOuCa'),
                           ('Bronchite', 'Dyspnea')])

#apprentissage des paramètres
#print("estimation des cpds :")
from pgmpy.estimators import BayesianEstimator

예제 #18

0

파일 보기

파일: pgm_tutorial.py 프로젝트: mpierrau/DD2420_Probabilistical_Graphical_Models

data2 = pd.DataFrame(data=raw_data2)

import time

t0 = time.time()
# Uncomment below to perform exhaustive search
searcher = ExhaustiveSearch(data2, scoring_method=K2Score(data2))
search = searcher.all_scores()
print('time:', time.time() - t0)

# Uncomment for printout:
#for score, model in search:
#    print("{0}        {1}".format(score, model.edges()))

separator()

hcs = HillClimbSearch(data2, scoring_method=K2Score(data))
model = hcs.estimate()

hcs2 = HillClimbSearch(data2, scoring_method=K2Score(data2))
model2 = hcs2.estimate()

hcs_bic = HillClimbSearch(data, scoring_method=BicScore(data))
model_bic = hcs_bic.estimate()

hcs_bic2 = HillClimbSearch(data2, scoring_method=BicScore(data2))
model_bic2 = hcs_bic2.estimate()

# End of Task 6

예제 #19

0

파일 보기

# 时间:2020/12/21  15:38
import pandas as pd
import networkx as nx
from matplotlib import pyplot as plt
from pgmpy.models import BayesianModel
from pgmpy.estimators import HillClimbSearch
from pgmpy.estimators import BicScore

data = pd.read_csv(
    r'C:\Users\haomiaowu\Desktop\BN-Cheminformatics\Train-clear.csv')
bic = BicScore(data)

hs = HillClimbSearch(data, scoring_method=BicScore(data))
best_model = hs.estimate()
print(best_model.edges())

nx.draw(
    best_model,
    with_labels=True,
    node_size=1000,
    font_weight='bold',
    node_color='y',
)

plt.show()

예제 #20

0

파일 보기

def annealing(maxsteps=1000, debug=True):
    """ Optimize the black-box function 'cost_function' with the simulated annealing algorithm."""
    #Ler data
    with open('Asia.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        aux = 0
        data = []
        data1 = [[] for i in range(8)]
        for row in csv_reader:
            data.append(row)
            for i in range(len(row)):
                data1[i].append(row[i])
            aux = aux + 1
            if aux == 50001:
                break

    data = {}
    for i in range(len(data1)):
        data[data1[i][0]] = [data1[i][j] for j in range(1, len(data1[i]))]
    data = pd.DataFrame(data)
    print("Data: ")
    print(data)  #Dados Retirandos do arquivo
    prob = 0.5
    min_valor = 0
    max_valor = 2
    nao_dag = []
    nodes = ['Pollution', 'Smoker', 'Cancer', 'Xray', 'Dyspnoea']
    nodes = ['asia', 'tub', 'smoke', 'lung', 'bronc', 'either', 'xray', 'dysp']
    ind_size = round((len(nodes) * len(nodes) - len(nodes)) / 2)
    ind = False
    while ind == False:
        aux = [random.randint(min_valor, max_valor) for i in range(ind_size)]
        if aux not in nao_dag:
            G = vetor_Rede(aux, nodes)
            if G:
                state = deep_copy(aux)
                ind = True
            else:
                nao_dag.append(aux)
    print('state')
    print(state)
    bic_score = BicScore(data)
    print(vetor_Rede(state, nodes))
    cost = cost_function(state, bic_score, nodes)
    states, costs = [state], [cost]
    for step in range(maxsteps):
        print(step)
        fraction = step / float(maxsteps)
        T = temperature(fraction)
        #[new_state,new_cost]=pertubacao(deep_copy(state),deep_copy(cost),prob,max_valor,min_valor,bic_score,nodes,nao_dag)
        [new_state,
         new_cost] = mutacao(deep_copy(state), deep_copy(cost), prob,
                             max_valor, min_valor, bic_score, nodes, nao_dag)
        #new_cost = cost_function(new_state,bic_score,nodes)
        #if debug: print("Step #{:>2}/{:>2} : T = {:>4.3g}, state = {:>4.3g}, cost = {:>4.3g}, new_state = {:>4.3g}, new_cost = {:>4.3g} ...".format(step, maxsteps, T, state, cost, new_state, new_cost))

        if acceptance_probability(cost, new_cost, T) > random.random():

            state1 = new_state.copy()
            cost = deep_copy(new_cost)

            states.append(state1)
            costs.append(cost)
            state = deep_copy(state1)
            # print("  ==> Accept it!")
        # else:
        #    print("  ==> Reject it...")
    return state, cost_function(state, bic_score, nodes), states, costs

예제 #21

0

파일 보기

state, c, states, costs = annealing(maxsteps=3000, debug=True)
nodes = ['asia', 'tub', 'smoke', 'lung', 'bronc', 'either', 'xray', 'dysp']
G = vetor_Rede(state, nodes)
nx.draw(G, with_labels=True)
print(state)
print(c)
with open('Asia.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    aux = 0
    data = []
    data1 = [[] for i in range(8)]
    for row in csv_reader:
        data.append(row)
        for i in range(len(row)):
            data1[i].append(row[i])
        aux = aux + 1
        if aux == 50001:
            break
#22376.39851240954
data = {}
for i in range(len(data1)):
    data[data1[i][0]] = [data1[i][j] for j in range(1, len(data1[i]))]
data = pd.DataFrame(data)
print("Data: ")
print(data)  #Dados Retirandos do arquivo
reader = BIFReader('asia.bif')  # melhor rede do asia, como esta no bnlearn.com
asia_model = reader.get_model()  # lendo esse modelo
print("Score BIC")
print(abs(BicScore(data).score(asia_model)))
#see_annealing(states, costs)

예제 #22

0

파일 보기

파일: structure_learning_bag_of_activities.py 프로젝트: aslesani/pgmpy_fork

#print(dataPreparation.get_work_lists())
feature_names = dataPreparation.get_work_lists()
feature_names.append("Person")
print(feature_names)
#mydata = np.random.randint(low=0, high=2,size=(100, 6))
mydata = np.genfromtxt(
    r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\activities+time_ordered_withoutdatetime.csv',
    delimiter=",")
#pd.read_csv(r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\data.csv')
#print(mydata)
data = pd.DataFrame(mydata, columns=feature_names)  #['X', 'Y'])
print(data)

list_of_scoring_methods = [
    BicScore(data),
    #BdeuScore(data),
    #K2Score(data)
]

for scoreMethod in list_of_scoring_methods:
    start_time = time.time()
    hc = HillClimbSearch(data, scoreMethod)
    best_model = hc.estimate()
    print(hc.scoring_method)
    print(best_model.edges())
    end_time = time.time()
    print("execution time in seconds:")
    print(end_time - start_time)

estimator = BayesianEstimator(best_model, data)

예제 #23

0

파일 보기

파일: network.py 프로젝트: ronikobrosly/surgo_bayesian_network

    def learn_structure(self,
                        file_path,
                        algorithm="hc",
                        significance_level=0.05):
        """
        Employs `pgmpy` package's Bayesian Network structure learning algorithms to learn
        structure from a dataset. Saves a tabular version of the result as a CSV file.

        Arguments:
            algorithm: str, optional (default = 'hc')
                Determines whether the hill-climbing or Peter-Clark are employed.
                Two possible values include: 'hc', 'pc'. Note, I found a bug in pgmpy implementation
                halfway through this project. Don't use the 'pc' method.
            file_path: str, the absolute path to save the file to (e.g. "~/Desktop/BN_structure.csv")
            significance_level: float, option (default = 0.05)
                Statistical significance cutoff for use in pruning the network when using the PC
                algorithm. Lower values produce sparser networks.

        Returns:
            None
        """
        self.structure_algorithm = algorithm

        if self.verbose:
            print(
                "Depending on the number of variables in your dataset, this might take some time..."
            )

        # Learn structure, using one of the algorithms
        np.random.seed(self.random_seed)

        if algorithm == "hc":

            # Filter out columns with zero correlation with target variable
            self.filtered_df = self._initial_filter()

            # Run HC algorithm
            self.structure_model = HillClimbSearch(
                self.filtered_df,
                scoring_method=BicScore(self.filtered_df)).estimate()

            if self.verbose:
                print(
                    f"Structure learned! Saving structure to the following CSV: {file_path}"
                )

            # Eliminate isolated subgraphs
            G = self.structure_model.to_undirected()

            connected_nodes = list(
                nx.algorithms.components.node_connected_component(
                    G, self.target_variable))

            disconnected_nodes = list(
                set(list(self.structure_model.nodes)) - set(connected_nodes))

            for node in disconnected_nodes:
                self.structure_model.remove_node(node)
                self.filtered_df.drop([node], axis=1, inplace=True)

            pd.DataFrame(
                list(self.structure_model.edges),
                columns=["from_variable", "to_variable"],
            ).to_csv(file_path, index=False)

        elif algorithm == "pc":
            self.filtered_df = self.df
            self.structure_model = ConstraintBasedEstimator(
                self.filtered_df).estimate(
                    significance_level=significance_level)

            if self.verbose:
                print(
                    f"Structure learned! Saving structure to the following CSV: {file_path}"
                )

            pd.DataFrame(
                list(self.structure_model.edges),
                columns=["from_variable", "to_variable"],
            ).to_csv(file_path, index=False)

예제 #24

0

파일 보기

파일: BayesianGraph.py 프로젝트: farzanefakhrian/MSNBC-Clustering

from pgmpy.estimators import HillClimbSearch, BicScore, BayesianEstimator
from pgmpy.models import BayesianModel
from pgmpy.readwrite.BIF import BIFWriter
import pandas as pd
import numpy as np
from time import time
import graphviz as gv
import os

train = pd.read_csv('../msnbcWithHeader.csv', sep=',')
train = train[train.sum(axis=1) < 200]
train[train > 1] = 1

train_start = time()
bic = BicScore(train)
hc = HillClimbSearch(train, scoring_method=bic)
best_model = hc.estimate(prog_bar=True)
edges = best_model.edges()
model = BayesianModel(edges)
model.fit(train, estimator=BayesianEstimator, prior_type="BDeu")
variables = model.nodes()

print(model.edges())
train_end = time() - train_start
print("train time " + str(train_end))

my_graph = gv.Digraph(format='png')
for node in variables:
    my_graph.node(node)
for edge in edges:
    my_graph.edge(edge[0], edge[1])

예제 #25

0

파일 보기

    def learn(self, file1, file2):
        f1 = open(file1, encoding="utf8")
        lines = f1.readlines()
        edges = self.getegdes(lines[0])
        data = pd.read_csv(file2)

        G = nx.DiGraph()
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])

        est = HillClimbSearch(data, scoring_method=BicScore(data))
        model = est.estimate()
        G_ = nx.DiGraph()
        G_.add_edges_from(model.edges())

        for i, j in G_.edges():
            if i not in G.nodes() or j not in G.nodes():
                G.add_edge(i, j)
            elif not nx.has_path(G, j, i):
                G.add_edge(i, j)

        new_model = BayesianModel()
        new_model.add_edges_from(G.edges)
        G = new_model.copy()

        # N = G.number_of_nodes()
        # B = np.zeros((N*(N-1)//2, N))
        # i = 0
        # y = []
        # k = 0
        # nodes = list(G.nodes._nodes.keys())
        # for i in range(len(nodes)):
        #     for j in range(i+1, len(nodes)):
        #         if nx.has_path(G, nodes[i], nodes[j]):
        #             y.append(1)
        #             B[k, i] = 1
        #             B[k, j] = -1
        #         elif nx.has_path(G, nodes[j], nodes[i]):
        #             y.append(-1)
        #             B[k, i] = 1
        #             B[k, j] = -1
        #         else:
        #             y.append(0)
        #         k += 1
        #
        # W = np.eye(N, N)
        # est = HillClimbSearch(data, scoring_method=BicScore(data))
        # model = est.estimate()
        # G_ = nx.DiGraph()
        # G_.add_edges_from(model.edges())
        # queue = []
        # for node in G_.nodes():
        #     if G_.in_degree(node) == 0:
        #         queue.append(node)
        #         G.node[node]['s'] = N
        #     else:
        #         G.node[node]['s'] = N//2
        # while len(queue)>0:
        #     now = queue[0]
        #     l = list(G_._succ[now].keys())
        #     for i in l:
        #         G.node[i]['s'] = G.node[now]['s'] - 1
        #     queue += l
        #     queue.pop(0)
        #
        # phai = []
        # for node in G.nodes():
        #     phai.append(G.node[node]['s'])
        # miu1 = np.dot(np.transpose(B), B)
        # miu1 = np.linalg.pinv(miu1)
        # miu2 = np.dot(np.transpose(B), y)
        # miu2 = miu2 + phai
        # miu = np.dot(miu1, miu2)
        #
        # seq = miu.tolist()
        # seq = list(zip(seq, nodes))
        # seq = sorted(seq, key=lambda s: s[0])
        # seq = [x[1] for x in seq]

        # nx.draw(G)
        # plt.show()
        estimator = BayesianEstimator(G, data)

        edges = []
        for i in G.edges:
            edges.append(str(i))
        print(edges)
        for i in G.nodes:
            cpd = estimator.estimate_cpd(i, prior_type="K2")
            nodeName = i
            values = dict(data[i].value_counts())
            valueNum = len(values)
            CPT = np.transpose(cpd.values)
            # CPT = cpd.values
            sequence = cpd.variables[1::]
            card = []
            for x in sequence:
                s = len(dict(data[x].value_counts()))
                card.append(s)
            output = nodeName + '\t' + str(valueNum) + '\t' + str(
                CPT.tolist()) + '\t' + str(sequence) + '\t' + str(card)
            print(output)

예제 #26

0

파일 보기

import pandas as pd
from pgmpy.estimators import HillClimbSearch, ExhaustiveSearch
from pgmpy.estimators import BDeuScore, BicScore, K2Score
##结构学习
data = pd.read_csv('data.csv', encoding='gb18030')
df = pd.DataFrame(data)
bic = BicScore(df)
k2 = K2Score(df)
hc = HillClimbSearch(df, scoring_method=bic)
#hc = ExhaustiveSearch(df, k2)
model = hc.estimate()
for ee in model.edges():
    print(ee)



##参数学习
from pgmpy.models import BayesianModel
mod = BayesianModel(model.edges())
mod.fit(df)
for cpd in mod.get_cpds():
    print(cpd)

#print(mod.local_independencies('HA'))

##模型推理
from pgmpy.inference import VariableElimination, BeliefPropagation
cancer_infer = VariableElimination(mod)
q = cancer_infer.query(variables=['HA'])
print(q)

예제 #27

0

파일 보기

파일: bayes_net.py 프로젝트: epickens/Bayesian-Networks

col_names = pd.read_csv('data/names.csv')  # 'data/names.csv'
data = pd.read_csv('data/breast-cancer-wisconsin.data',
                   names=col_names.columns)
data = data[data["bare_nuclei"] != '?']
data.set_index('id', inplace=True)  #stop the model from using id as a node

train, test = train_test_split(data, test_size=0.2, random_state=0)
Y_test = test['class']
test = test.drop(['class'], axis=1)

#convert labels to something that can be handled be sklearn's eval functions
labelencoder = LabelEncoder()
Y_test = labelencoder.fit_transform(Y_test.values.ravel())

### Greedy Structure Learning with Hill Climbing
hc = HillClimbSearch(data, scoring_method=BicScore(train))
hc_model = hc.estimate()

### Parameter Learning with Bayesian Estimation
hc_model.fit(train, estimator=BayesianEstimator, prior_type="BDeu")
### If the following for loop is un-commented the terminal will be flooded with CPDs
"""
for cpd in best_model.get_cpds():
    print(cpd)
"""

print()

### Another Method (it will throw errors about sample size - but it still runs and shouldn't be too messed up)
###Constraint Based Structure Learning
est = ConstraintBasedEstimator(train)

예제 #28

0

파일 보기

 def __init__(self, dataframe):
     self.estimator = BicScore(dataframe)
     self.hashed_local_scores = {}

예제 #29

0

파일 보기

model.add_nodes_from(data.columns.values)

# Learn temporal relations from data
model.learn_temporal_relationships(data)

# Delete columns with temporal information
data.fillna(0, inplace=True)
for col in list(data.columns.values):
    if col.endswith(ITBN.start_time_marker) or col.endswith(
            ITBN.end_time_marker):
        data.drop(col, axis=1, inplace=True)
    elif not col.startswith(ITBN.temporal_node_marker):
        data[col] = data[col].map({1: 'Y', -1: 'N'})

# Learn model structure from data and temporal relations
hc = HillClimbSearchITBN(data, scoring_method=BicScore(data))
model = hc.estimate(start=model)
# model.add_edge('response', 'command')
# model.add_edge('response', 'tm_response_command')
# model.add_edge('command', 'tm_response_command')

# Learn model parameters
model.fit(data)

# Add observation nodes and cpds
obs_edges = list()
obs_cpds = list()
state_names = {
    'command': ['N', 'Y'],
    'prompt': ['N', 'Y'],
    'reward': ['N', 'Y'],

예제 #30

0

파일 보기

class MDL_Scorer:
    def __init__(self, dataframe):
        self.estimator = BicScore(dataframe)
        self.hashed_local_scores = {}

    def local_score(self, node_name, parent_names):
        key = node_name + str(parent_names)
        if key not in self.hashed_local_scores:
            score = abs(self.estimator.local_score(node_name, parent_names))
            self.hashed_local_scores[key] = score

        return self.hashed_local_scores[key]

    def score(self, network, verbose=0):
        total = 0
        if verbose > 2:
            print("starting scoring")
        for i in range(network.num_nodes()):
            if verbose > 3:
                print("node", i)

            parents = network.get_parents(i)  # get parents

            node_name = network.node_names(i)
            parent_names = network.node_names(parents)
            # print("node", node_name, "parents", parent_names)
            if verbose > 3:
                print("starting local score")
            local_score_ = self.local_score(node_name, parent_names)
            if verbose > 3:
                print("ended local score")
            # print("node", node_name, "parents",
            #      parent_names, "local score", local_score)
            total += local_score_
        if verbose > 2:
            print("ended scoring")

        return total

    # this performance can be improved
    def n_lowest_score(self,
                       n,
                       networks,
                       score_history_list,
                       network_history_list,
                       verbose=False):
        networks_sorted = networks.copy()
        scores = []
        for network in networks:
            score_ = self.score(network, verbose=verbose)
            scores.append(score_)

        x, y = sort_together([scores, networks_sorted])
        score_history_list += list(x)
        network_history_list += list(y)

        y = list(y[:n])
        return y

    def lowest_score(self, networks, verbose=False):
        result = {
            'best_index': -1,
            'best_score': float('inf'),
            'best_network': None
        }

        for i in range(len(networks)):
            network = networks[i]
            local = self.score(network)

            if local < result['best_score']:
                result['best_index'] = i
                result['best_score'] = local
                result['best_network'] = networks[i]

        return result