def createBayesianNetwork():
    learner=gum.BNLearner("logs/Log/WholeLog.csv")
    learner.useK2([0,1,2,3,4,5,6,7,8,9,10,11,12])
    bn2=learner.learnBN()
    print("Learned in {0}s".format(learner.currentTime()))
    gnb.showBN(bn2)
    return bn2
Beispiel #2
0
def createBayesianNetwork():
    learner = gum.BNLearner("logs/Log/WholeLog.csv")
    learner.useK2([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
    bn2 = learner.learnBN()
    print("Learned in {0}s".format(learner.currentTime()))
    gnb.showBN(bn2)
    return bn2
Beispiel #3
0
df = pandas.read_csv(os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv'))

for k in train_df.keys():
    print('{0}: {1}'.format(k, len(train_df[k].unique())))

template=gum.BayesNet()
template.add(gum.LabelizedVariable("target", "target", ['<=50K', '>50K']))
template.add(gum.LabelizedVariable("sex", "sex",['Male','Female']))
template.add(gum.LabelizedVariable("age_range", "age_range",['0-20','21-30','31-65','66-90']))
template.add(gum.LabelizedVariable("race", "race",['White', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other', 'Black']))
template.add(gum.LabelizedVariable("workclass", "workclass",['Private', 'Self-emp-not-inc', 'Self-emp-inc', 'Federal-gov', 'Local-gov', 'State-gov', 'Without-pay', 'Never-worked']))
template.add(gum.LabelizedVariable("relationship", "relationship", ['Wife', 'Own-child', 'Husband', 'Not-in-family', 'Other-relative', 'Unmarried']))
template.add(gum.LabelizedVariable("marital_status", "marital_status", ['Married-civ-spouse', 'Divorced', 'Never-married', 'Separated', 'Widowed', 'Married-spouse-absent', 'Married-AF-spouse'])) 
template.add(gum.LabelizedVariable("occupation", "occupation",['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces']))            
gnb.showBN(template)

train_df.to_csv(os.path.join('/content/gdrive/My Drive/train_data2.csv'), index=False)
file = os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv')

learner = gum.BNLearner(file, template)
bn = learner.learnBN()
bn

gnb.showInformation(bn,{},size="20")

gnb.showInference(bn)

gnb.showPosterior(bn,evs={"sex": "Male", "age_range": '21-30'},target='target')

gnb.sideBySide(bn, gum.MarkovBlanket(bn, 'target'), captions=["Learned Bayesian Network", "Markov blanket of 'target'"])



# %% markdown
# ## Parameter Learning from the database
# We give the `asiaBN` bayesian network as a parameter for the learner in order to have the variables and order of labels for each variable.
# %% codecell
# using the BN as template for variables and labels
learner = gum.BNLearner(outPath, asiaBN)
learner.setInitialDAG(g = asiaBN.dag())

# Learn the parameters when structure is known:
asiaBN_learnedParams: BayesNet = learner.learnParameters()

gnb.showBN(asiaBN_learnedParams)
# gnb.showBN(asiaBN) # same thing

# %% codecell
# This is the bad example: learning without the initial template gets the nodes and structure wrong
learnerNoTemplate = gum.BNLearner(outPath)
learnerNoTemplate.setInitialDAG(g = asiaBN.dag())
asiaBNNoTemplate: BayesNet = learnerNoTemplate.learnParameters()

gnb.showBN(asiaBNNoTemplate)
# %% codecell
# This is what the DAG looks like
asiaBN.dag()
# %% codecell
asiaBNNoTemplate.dag() # same
# %% codecell
Beispiel #5
0
def trainmodel(filename):
    ob, hb, lb, cb, vb = gentt(filename)
    #print(ob,hb,lb,cb)
    #build the model
    bn = gum.BayesNet(filename)
    Open0 = bn.add('Open0', ob)
    High0 = bn.add('High0', hb)
    Low0 = bn.add('Low0', lb)
    Close0 = bn.add('Close0', cb)
    Volume0 = bn.add('Volume0', vb)
    Open1 = bn.add('Open1', ob)
    High1 = bn.add('High1', hb)
    Low1 = bn.add('Low1', lb)
    Close1 = bn.add('Close1', cb)
    Volume1 = bn.add('Volume1', vb)
    learner = gum.BNLearner(filename + "_train.csv", bn)
    learner.addForbiddenArc('Open1', 'Open0')
    learner.addForbiddenArc('Open1', 'Close0')
    learner.addForbiddenArc('Open1', 'High0')
    learner.addForbiddenArc('Open1', 'Low0')
    learner.addForbiddenArc('Open1', 'Volume0')
    learner.addForbiddenArc('High1', 'Open0')
    learner.addForbiddenArc('High1', 'Close0')
    learner.addForbiddenArc('High1', 'High0')
    learner.addForbiddenArc('High1', 'Low0')
    learner.addForbiddenArc('High1', 'Volume0')
    learner.addForbiddenArc('Low1', 'Open0')
    learner.addForbiddenArc('Low1', 'Close0')
    learner.addForbiddenArc('Low1', 'High0')
    learner.addForbiddenArc('Low1', 'Low0')
    learner.addForbiddenArc('Low1', 'Volume0')
    learner.addForbiddenArc('Close1', 'Open0')
    learner.addForbiddenArc('Close1', 'Close0')
    learner.addForbiddenArc('Close1', 'High0')
    learner.addForbiddenArc('Close1', 'Low0')
    learner.addForbiddenArc('Close1', 'Volume0')
    learner.addForbiddenArc('Volume1', 'Open0')
    learner.addForbiddenArc('Volume1', 'Close0')
    learner.addForbiddenArc('Volume1', 'High0')
    learner.addForbiddenArc('Volume1', 'Low0')
    learner.addForbiddenArc('Volume1', 'Volume0')
    #learner.addMandatoryArc('Close0','Close1')
    learner.useLocalSearchWithTabuList()
    bn = learner.learnBN()
    gnb.showBN(bn)
    learner = gum.BNLearner(filename + "_train.csv", bn)
    learner.setInitialDAG(bn.dag())
    learner.useAprioriSmoothing(1)
    bn = learner.learnParameters()
    #gnb.showInference(bn,evs={})
    #do inference and calculate the accuracy
    ie = gum.LazyPropagation(bn)
    ie.makeInference()
    N = 0.0
    acc = 0
    with open(filename + '_test.csv', 'r', encoding="utf-8") as csvfile:
        reader = csv.reader(csvfile)
        for line in list(reader)[1:]:
            c, o, h, l, v, t = [
                line[0], line[1], line[2], line[3], line[4], line[5]
            ]
            ie.eraseAllEvidence()
            ie.setEvidence({
                'Close0': c,
                'Open0': o,
                'High0': h,
                'Low0': l,
                'Volume0': v
            })
            ie.makeInference()
            prob = ie.posterior(Close1).tolist()
            if prob[0] < 0.6:
                N = N + 1
                if t == '1':
                    acc = acc + 1
    return acc, N
Beispiel #6
0
def main():
    bn = gum.BayesNet('nuc_inf')
    #add variables to the network
    va = gum.LabelizedVariable('nuc', 'a labelized variable', 2)
    va.addLabel('-1')
    nuc = bn.add(va)
    A = bn.add('A', 6)
    R, N = [bn.add(name, 7) for name in ['R', 'N']]
    D, Q = [bn.add(name, 2) for name in ['D', 'Q']]
    partition("protein")
    learner = gum.BNLearner("protein_train.csv", bn)
    #These arcs can be added or deleted
    #learner.addMandatoryArc('A','nuc')
    #learner.addMandatoryArc('R','nuc')
    #learner.addMandatoryArc('Q','nuc')
    #learner.addMandatoryArc('N','nuc')
    #learner.addMandatoryArc('D','nuc')
    learner.useLocalSearchWithTabuList()
    bn0 = learner.learnBN()
    gnb.showBN(bn0)
    learner.useGreedyHillClimbing()
    bn1 = learner.learnBN()
    gnb.showBN(bn1)
    learner.useK2([5, 4, 3, 2, 1, 0])
    bn2 = learner.learnBN()
    gnb.showBN(bn2)
    #We have 2 different BN structures according to the previous parts. Now, we do parameter learning
    learner = gum.BNLearner("protein_train.csv", bn)
    learner.setInitialDAG(bn0.dag())
    learner.useAprioriSmoothing(1)
    bn01 = learner.learnParameters()  #first
    gnb.showBN(bn01)
    learner = gum.BNLearner("protein_train.csv", bn)
    learner.setInitialDAG(bn2.dag())
    learner.useAprioriSmoothing(1)
    bn11 = learner.learnParameters()  #second
    gnb.showBN(bn11)
    #first
    ie1 = gum.LazyPropagation(bn01)
    ie1.makeInference()
    gnb.showInference(bn01, evs={})
    #second
    ie2 = gum.LazyPropagation(bn11)
    ie2.makeInference()
    gnb.showInference(bn11, evs={})
    with open('protein_test.csv', 'r', encoding="utf-8") as csvfile:
        reader = csv.reader(csvfile)
        count1 = 1
        count2 = 1
        acc1 = 0
        acc2 = 0
        for line in list(reader)[1:]:
            vnuc, vA, vR, vN, vD, vQ = [
                int(line[0]),
                int(line[1]),
                int(line[2]),
                int(line[3]),
                int(line[4]),
                int(line[5])
            ]
            #print(vnuc,vA,vR,vN,vD,vQ)
            ie2.eraseAllEvidence()
            ie1.eraseAllEvidence()
            ie1.setEvidence({'A': vA, 'R': vR, 'N': vN, 'D': vD, 'Q': vQ})
            ie2.setEvidence({'A': vA, 'R': vR, 'N': vN, 'D': vD, 'Q': vQ})
            ie1.makeInference()
            ie2.makeInference()
            ie2.addTarget(nuc)
            ie1.addTarget(nuc)
            if len(ie2.posterior(nuc).argmax()
                   ) == 1:  #if we have one determined value of prob
                #print(ie2.posterior(nuc))
                #print(ie2.posterior(nuc).argmax()[0]['nuc'])
                if ie2.posterior(nuc).argmax()[0]['nuc'] == 2:  #nuc=-1
                    if vnuc == -1:
                        acc2 = acc2 + 1
                if ie2.posterior(nuc).argmax()[0]['nuc'] == vnuc:
                    acc2 = acc2 + 1
                count2 = count2 + 1
            if len(ie1.posterior(nuc).argmax()) == 1:
                #print(ie1.posterior(nuc))
                #print(ie1.posterior(nuc).argmax()[0]['nuc'])
                if ie1.posterior(nuc).argmax()[0]['nuc'] == 2:
                    if vnuc == -1:
                        acc1 = acc1 + 1
                if ie1.posterior(nuc).argmax()[0]['nuc'] == vnuc:
                    acc1 = acc1 + 1
                count1 = count1 + 1
        acc2 = acc2 / count2
        acc1 = acc1 / count1
    print(acc2, acc1)