Esempio n. 1
0
 def testMarkovBlanketMultiLevel(self):
     bn = gum.fastBN("Z<-A->B->C->D->E<-Y;X->G<-F<-C<-I<-H->W")
     self.assertEqual(gum.MarkovBlanket(bn, "C", 1).size(), 5)
     self.assertEqual(gum.MarkovBlanket(bn, "C", 2).size(), 11)
     self.assertEqual(gum.MarkovBlanket(bn, "C", 3).size(), 13)
     with self.assertRaises(gum.InvalidArgument):
         err = gum.MarkovBlanket(bn, "C", 0)
Esempio n. 2
0
    def testMarkovBlanketStructure(self):
        bn = gum.fastBN("a->b->c->d->e;f->d->g;h->i->g")
        self.assertFalse(
            gum.MarkovBlanket(bn, "a").hasSameStructure(gum.fastBN("b->a")))

        self.assertTrue(
            gum.MarkovBlanket(bn, "a").hasSameStructure(gum.fastBN("a->b")))
        self.assertTrue(
            gum.MarkovBlanket(bn, "b").hasSameStructure(gum.fastBN("a->b->c")))
        self.assertTrue(
            gum.MarkovBlanket(bn, "c").hasSameStructure(
                gum.fastBN("b->c->d;f->d")))
        self.assertTrue(
            gum.MarkovBlanket(bn, "d").hasSameStructure(
                gum.fastBN("c->d->e;f->d->g;i->g")))
        self.assertTrue(
            gum.MarkovBlanket(bn, "e").hasSameStructure(gum.fastBN("d->e")))
        self.assertTrue(
            gum.MarkovBlanket(bn,
                              "f").hasSameStructure(gum.fastBN("c->d;f->d;")))
        self.assertTrue(
            gum.MarkovBlanket(bn,
                              "g").hasSameStructure(gum.fastBN("d->g;i->g;")))
        self.assertTrue(
            gum.MarkovBlanket(bn, "h").hasSameStructure(gum.fastBN("h->i;")))
        self.assertTrue(
            gum.MarkovBlanket(bn, "i").hasSameStructure(
                gum.fastBN("d->g;h->i->g;;")))
Esempio n. 3
0
 def _get_markov_blanket(self):
     feats_markov_blanket = []
     for i in gum.MarkovBlanket(self.bn, self.target).nodes():
         convert = self._get_list_names_order()
         feats_markov_blanket.append(convert[i])
     feats_markov_blanket.remove(self.target)
     return feats_markov_blanket
Esempio n. 4
0
def _independenceListForPairs(bn, target=None):
    """
    returns a list of triples `(i,j,k)` for each non arc `(i,j)` such that `i` is independent of `j` given `k`.

    Parameters
    ----------
    bn: gum.BayesNet
      the Bayesian Network

    target: (optional) str or int
      the name or id of the target variable. If a target is given, only the independence given a subset of the markov blanket of the target are tested.

    Returns
    -------
    List[(str,str,List[str])]
      A list of independence found in the structure of BN.
    """
    def powerset(iterable):
        xs = list(iterable)
        # note we return an iterator rather than a list
        return itertools.chain.from_iterable(
            itertools.combinations(xs, n) for n in range(len(xs) + 1))

    # testing every d-separation
    l = []
    nams = sorted(bn.names())
    if target is None:
        firstnams = nams.copy()
        indepnodes = bn.names()
    else:
        indepnodes = {
            bn.variable(i).name()
            for i in gum.MarkovBlanket(bn, target).nodes()
        }
        if isinstance(target, str):
            firstnams = [target]
        else:
            firstnams = [bn.variable(target).name()]

    for i in firstnams:
        nams.remove(i)
        for j in nams:
            if not (bn.existsArc(i, j) or bn.existsArc(j, i)):
                for k in powerset(sorted(indepnodes - {i, j})):
                    if bn.isIndependent(i, j, k):
                        l.append((i, j, tuple(k)))
                        break
    return l
Esempio n. 5
0
template.add(gum.LabelizedVariable("occupation", "occupation",['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces']))            
gnb.showBN(template)

train_df.to_csv(os.path.join('/content/gdrive/My Drive/train_data2.csv'), index=False)
file = os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv')

learner = gum.BNLearner(file, template)
bn = learner.learnBN()
bn

gnb.showInformation(bn,{},size="20")

gnb.showInference(bn)

gnb.showPosterior(bn,evs={"sex": "Male", "age_range": '21-30'},target='target')

gnb.sideBySide(bn, gum.MarkovBlanket(bn, 'target'), captions=["Learned Bayesian Network", "Markov blanket of 'target'"])



ie=gum.LazyPropagation(bn)
init_belief(ie)
ie.addTarget('target')
result = testdf.apply(lambda x: is_well_predicted(ie, bn, 0.157935, x), axis=1)
result.value_counts(True)

positives = sum(result.map(lambda x: 1 if x.startswith("True") else 0 ))
total = result.count()
print("{0:.2f}% good predictions".format(positives/total*100))

showROC(bn,file, 'target', "True", True, True)
Esempio n. 6
0
def generate_BN_explanationsMB(instance,
                               label_lst,
                               feature_names,
                               class_var,
                               encoder,
                               scaler,
                               model,
                               path,
                               dataset_name,
                               variance=0.25,
                               algorithm="Hill Climbing"):

    # necessary for starting Numpy generated random numbers in an initial state
    np.random.seed(515)

    # Necessary for starting core Python generated random numbers in a state
    rn.seed(515)

    indx = instance['index']
    prediction_type = instance['prediction_type'].lower() + "s"
    prediction_type = prediction_type.replace(" ", "_")

    # generate permutations
    df = generate_permutations(instance,
                               label_lst,
                               feature_names,
                               class_var,
                               encoder,
                               scaler,
                               model,
                               variance=variance)

    # discretize data
    df_discr = discretize_dataframe(df, class_var, num_bins=4)

    # save discretised dataframe (for debugging and reproduceability purposes)
    path_to_permutations = path + "feature_permutations/" + dataset_name.replace(
        ".csv", "") + "/" + prediction_type + "/" + str(indx) + ".csv"
    df_discr.to_csv(path_to_permutations, index=False)

    # normalise dataframe
    normalise_dataframe(path_to_permutations)

    # learn BN
    bn, infoBN, essencGraph = learnBN(path_to_permutations.replace(
        ".csv", "_norm.csv"),
                                      algorithm=algorithm)

    # perform inference
    inference = gnb.getInference(bn,
                                 evs={},
                                 targets=df_discr.columns.to_list(),
                                 size='12')

    # compute Markov Blanket
    markov_blanket = gum.MarkovBlanket(bn, class_var)

    # show networks
    gnb.sideBySide(
        *[bn, inference, markov_blanket],
        captions=["Bayesian Network", "Inference", "Markov Blanket"])

    # save to file
    path_to_explanation = path + "explanations/" + dataset_name.replace(
        ".csv", "") + "/BN/" + prediction_type + "/"
    gum.lib.bn2graph.dotize(bn, path_to_explanation + str(indx) + "_BN")
    gum.saveBN(bn, path_to_explanation + str(indx) + "_BN.net")

    return [bn, inference, infoBN, markov_blanket]
Esempio n. 7
0
 def testMarkovBlanketSpecialArcs(self):
     bn = gum.fastBN(
         "aa->bb->cc->dd->ee;ff->dd->gg;hh->ii->gg;ff->ii;ff->gg")
     mb = gum.fastBN("cc->dd->ee;ff->dd->gg;ff->gg;ff->ii->gg")
     self.assertTrue(gum.MarkovBlanket(bn, "dd").hasSameStructure(mb))
Esempio n. 8
0
 def testChain(self):
     bn = gum.fastBN("a->b->c")
     eg = gum.MarkovBlanket(bn, "a")
     eg = gum.MarkovBlanket(bn, 1)