コード例 #1
0
    def testLocalSearchWithTabu(self):
        with self.assertRaises(gum.IOError):
            learner = gum.BNLearner("shouldNotExist.csv")

        try:
            learner = gum.BNLearner("shouldNotExist.csv")
        except gum.IOError:
            self.assertTrue(True)
            pass
        else:
            self.assertTrue(False)
コード例 #2
0
    def testPseudoCount(self):
        filename = self.agrumSrcDir('dataW.csv')
        with open(filename, "w") as src:
            src.write("""X,Y,Z
0,1,2
0,1,0
0,0,2
1,0,2
0,1,1
1,1,1
0,1,1
""")
        learner = gum.BNLearner(filename)
        self.assertEqual(learner.nbRows(), 7)
        self.assertEqual(learner.nbCols(), 3)

        self.assertEqual(learner.rawPseudoCount(["X"]), (5, 2))
        self.assertEqual(learner.rawPseudoCount(["X", "Z"]),
                         (1, 0, 2, 1, 2, 1))
        self.assertEqual(learner.rawPseudoCount(["Y", "Z"]),
                         (0, 1, 0, 3, 2, 1))

        learner.useSmoothingPrior(0.1)

        self.assertEqual(learner.rawPseudoCount(["X"]), (5.1, 2.1))
        self.assertEqual(learner.rawPseudoCount(["X", "Z"]),
                         (1.1, 0.1, 2.1, 1.1, 2.1, 1.1))
        self.assertEqual(learner.rawPseudoCount(["Y", "Z"]),
                         (0.1, 1.1, 0.1, 3.1, 2.1, 1.1))

        learner = gum.BNLearner(filename)
        self.assertEqual(learner.pseudoCount(["X"]).tolist(), [5, 2])
        self.assertEqual(
            learner.pseudoCount(["X", "Z"]).tolist(), [[1, 0], [2, 1], [2, 1]])
        self.assertEqual(
            learner.pseudoCount(["Y", "Z"]).tolist(), [[0, 1], [0, 3], [2, 1]])
        self.assertEqual(
            learner.pseudoCount(["Z", "Y"]).tolist(), [[0, 0, 2], [1, 3, 1]])

        learner.useSmoothingPrior(0.1)

        self.assertEqual(learner.pseudoCount(["X"]).tolist(), [5.1, 2.1])
        self.assertEqual(
            learner.pseudoCount(["X", "Z"]).tolist(),
            [[1.1, 0.1], [2.1, 1.1], [2.1, 1.1]])
        self.assertEqual(
            learner.pseudoCount(["Y", "Z"]).tolist(),
            [[0.1, 1.1], [0.1, 3.1], [2.1, 1.1]])
        self.assertEqual(
            learner.pseudoCount(["Z", "Y"]).tolist(),
            [[0.1, 0.1, 2.1], [1.1, 3.1, 1.1]])
def GSMN(mn, fileName, threshold=0.05):
    """
    Learn the structure of a markov network, using GSMN algorithm on a given the database

    Examples
    --------
    >>> mn=mnl.GSMN(template,"./samples/sampleMN.csv",0.0001)

    Parameters
    ----------
    mn : pyAgrum.MarkovNet
            the template of the markov network
    fileName : str
            the other markov network
    threshold : float
            default value : 0.05, hyperparameter used for the statistical test
            
    Returns
    -------
    pyAgrum.MarkovNet
            the learned markov network
    """
    V = mn.names()
    mnVariables = dict()
    for name in V:
        mnVariables[name] = mn.variableFromName(name)
    MB = dict()
    learner = gum.BNLearner(fileName)
    for variable in V:
        MB[variable] = GS(variable, V, learner, threshold)
    correctError(MB)
    mn = dictToMarkovNetwork(MB, mnVariables)
    return mn
コード例 #4
0
    def test3off2(self):
        learner = gum.BNLearner(self.agrumSrcDir('asia.csv'))
        learner.use3off2()
        learner.useNMLCorrection()
        learner.addForbiddenArc(4, 1)
        learner.addMandatoryArc(7, 5)

        d = gum.DAG()
        for i in range(8):
            d.addNodeWithId(i)
        learner.setInitialDAG(d)

        self.assertNotEqual(len(learner.names()), 0)

        try:
            bn = learner.learnBN()
        except:
            self.fail("Exception has been raised and should not")
        self.assertEqual(len(bn.arcs()), 9)
        self.assertFalse(bn.dag().existsArc(4, 1))
        self.assertTrue(bn.dag().existsArc(7, 5))

        try:
            mg = learner.learnMixedStructure()
        except:
            self.fail("Exception has been raised and should not")
        self.assertEqual(mg.sizeArcs(), 8)
        self.assertEqual(mg.sizeEdges(), 1)
        self.assertFalse(bn.dag().existsArc(4, 1))
        self.assertTrue(bn.dag().existsArc(7, 5))
        self.assertEqual(len(learner.latentVariables()), 2)
コード例 #5
0
ファイル: BN_unsup.py プロジェクト: RRMauritz/PDBQ
def run_bn_unsup(train_corr, test_corr, structure):
    """"
    This method first learns a BN based on train_corr, then it propagates evidence from test_corr through it, after
    which a new data set is created based on the new posteriors
    :param train_corr: training-data, not in one-hot encoding form!
    :param test_corr: test-data that is being updated, in one-hot encoding form
    :param structure: structure of the data (how many categories each attribute has)
    """
    structure_0 = [0] + structure

    # Learn the BN based on train_corr
    learner = gum.BNLearner(train_corr)
    learner.useScoreBDeu()
    bn = learner.learnBN()

    # Create a placeholder for the net_data
    new_data = np.zeros(test_corr.shape)
    for i in range(test_corr.shape[0]):
        dp = test_corr[i, :]  # fix an observation
        evs = {}
        k = 0
        for n in bn.nodes(
        ):  # Convert the evidence to a dictionary structure needed for propagation
            evs[n] = dp[sum(structure_0[:k + 1]):sum(structure_0[:k + 2])]
            k += 1
        ie = gum.LazyPropagation(bn)
        ie.setEvidence(evs)  # set the evidence
        pst = [ie.posterior(n).toarray() for n in bn.nodes()
               ]  # Extract the posteriors and store them in new_data
        new_data[i, :] = list(itertools.chain.from_iterable(pst))
        ie.eraseAllEvidence()
    return new_data
コード例 #6
0
    def testHillClimbing(self):
        learner = gum.BNLearner(self.agrumSrcDir('asia.csv'))
        learner.useGreedyHillClimbing()
        bn = learner.learnBN()
        self.assertEqual(bn.size(), 8)

        with self.assertRaises(gum.IOError):
            learner = gum.BNLearner("shouldNotExist.csv")

        try:
            learner = gum.BNLearner("shouldNotExist.csv")
        except gum.IOError:
            self.assertTrue(True)
            pass
        else:
            self.assertTrue(False)
コード例 #7
0
def independenceListForPairs(bn,
                             filename,
                             target=None,
                             plot=True,
                             alphabetic=False):
    """
    get the p-values of the chi2 test of a (as simple as possible) independence proposition for every non arc.

    Parameters
    ----------
    bn : gum.BayesNet
      the Bayesian network

    filename : str
      the name of the csv database

    alphabetic : bool
      if True, the list is alphabetically sorted else it is sorted by the p-value

    target: (optional) str or int
      the name or id of the target variable

    plot : bool
      if True, plot the result

    Returns
    -------
      the list
    """

    learner = gum.BNLearner(filename, bn)
    vals = {}
    for indep in _independenceListForPairs(bn, target):
        vals[indep] = learner.chi2(*indep)[1]

    if plot:
        plotvals = dict()
        for indep in vals:
            key = "$" + indep[0] + " \\perp " + indep[1]
            if len(indep[2]) > 0:
                key += " \\mid " + ",".join(indep[2])
            key += "$"
            plotvals[key] = vals[indep]

        if not alphabetic:
            sortedkeys = sorted(plotvals,
                                key=plotvals.__getitem__,
                                reverse=False)
        else:
            sortedkeys = list(plotvals.keys())

        fig = pylab.figure(figsize=(10, 1 + 0.25 * len(plotvals)))
        ax = fig.add_subplot(1, 1, 1)
        ax.plot([plotvals[k] for k in sortedkeys], sortedkeys, "o")
        ax.grid(True)
        ax.vlines(x=0.05, ymin=-0.5, ymax=len(vals) - 0.5, colors='purple')
        ax.add_patch(
            mpl.patches.Rectangle((0, -0.5), 0.05, len(vals), color="yellow"))

    return vals
コード例 #8
0
    def testHybridLearning(self):
        learner = gum.BNLearner(self.agrumSrcDir('data1.csv'))
        learner.useMIIC()
        eg = learner.learnEssentialGraph()
        skel = eg.skeleton()

        learner = gum.BNLearner(self.agrumSrcDir('data1.csv'))
        learner.setPossibleSkeleton(skel)
        bn = learner.learnBN()

        self.assertEqual(bn.sizeArcs(), 4)
        self.assertEqual(bn.parents(bn.idFromName("V")), {bn.idFromName("A")})
        self.assertEqual(
            bn.parents(bn.idFromName("Y")),
            {bn.idFromName("X"), bn.idFromName("V")})
        self.assertEqual(bn.parents(bn.idFromName("Z")), {bn.idFromName("Y")})
コード例 #9
0
def learnBN(file_path: str,
            algorithm: BN_Algorithm = BN_Algorithm.HillClimbing):
    learner = gum.BNLearner(file_path)

    if (algorithm == BN_Algorithm.HillClimbing):
        print_big("Selecting Greedy Hill Climbing Algorithm")
        learner.useGreedyHillClimbing()

    elif (algorithm == BN_Algorithm.LocalSearch):
        print_big("Selecting Local Search Algorithm")
        bn = learner.useLocalSearchWithTabuList()

    elif (algorithm == BN_Algorithm.ThreeOffTwo):
        print_big("Selecting 3Off2 Algorithm")
        learner.use3off2()

    elif (algorithm == BN_Algorithm.MIIC):
        print_big("Selecting MIIC Algorithm")
        learner.useMIIC()

    else:
        raise Exception('Not supported algorithm')

    bn = learner.learnBN()

    return bn
コード例 #10
0
def createBayesianNetwork():
    learner = gum.BNLearner("logs/Log/WholeLog.csv")
    learner.useK2([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
    bn2 = learner.learnBN()
    print("Learned in {0}s".format(learner.currentTime()))
    gnb.showBN(bn2)
    return bn2
コード例 #11
0
def learn_bn(filepath, learn_algo):
    try:
        learner = agrum.BNLearner(filepath)

    except IOError as e:
        print("learn_and_create_bn - I/O error({0}): {1}".format(
            e.errno, e.strerror))
        exit
    ''' Select learning method '''
    if learn_algo == 'hard-coded':
        learner.addMandatoryArc(0, 3)
        learner.addMandatoryArc(1, 3)
        learner.addMandatoryArc(2, 3)
        if sim_param.inclination_param:
            learner.addMandatoryArc(0, 4)
            learner.addMandatoryArc(1, 4)
            learner.addMandatoryArc(2, 4)
            learner.addMandatoryArc(3, 4)

    elif learn_algo == 'hillclimbing':
        learner.useGreedyHillClimbing()
    elif learn_algo == 'tabu':
        learner.useLocalSearchWithTabuList()
    elif learn_algo == 'k2':
        if sim_param.inclination_param:
            learner.useK2([4, 3, 2, 1, 0])
        else:
            learner.useK2([3, 2, 1, 0])

    else:
        print(
            'ERROR - learn_bn : there was a problem while selecting the learner'
        )
        sys.exit()
    ''' Select score (BDEU by default)'''
    if sim_param.score_likelihood:
        learner.useScoreLog2Likelihood()
        learner.useAprioriSmoothing()

    if sim_param.score_bic:
        learner.useScoreBIC()
        learner.useAprioriSmoothing()

    if sim_param.score_aic:
        learner.useScoreAIC()
        learner.useAprioriSmoothing()

    if sim_param.score_k2:
        learner.useScoreK2()


#        learner.useAprioriSmoothing()

    bn = learner.learnBN()
    if sim_param.debug:
        print("BN learned.\n", bn)
    return bn
コード例 #12
0
    def testLocalSearchWithTabuAccurate(self):
        learner = gum.BNLearner(self.agrumSrcDir('asia.csv'))
        learner.useLocalSearchWithTabuList()

        bn = learner.learnBN()

        ref = gum.loadBN(self.agrumSrcDir('asia2.bif'), verbose=False)

        f = gum.ExactBNdistance(bn, ref)
        res = f.compute()
        self.assertAlmostEqual(res['klPQ'], 0.5, delta=0.5)
コード例 #13
0
    def test_setSliceOrder_with_names(self):
        learner = gum.BNLearner(self.agrumSrcDir('asia3.csv'))
        learner.setSliceOrder([["smoking?", "lung_cancer?"],
                               ["bronchitis?", "visit_to_Asia?"],
                               ["tuberculosis?"]])

        learner = gum.BNLearner(self.agrumSrcDir('asia3.csv'))
        learner.setSliceOrder([[0, "lung_cancer?"], [2, "visit_to_Asia?"],
                               ["tuberculosis?"]])

        learner = gum.BNLearner(self.agrumSrcDir('asia3.csv'))

        with self.assertRaises(gum.DuplicateElement):
            learner.setSliceOrder([["smoking?", "lung_cancer?"],
                                   [0, "visit_to_Asia?"], ["tuberculosis?"]])

        with self.assertRaises(gum.MissingVariableInDatabase):
            learner.setSliceOrder([["smoking?", "lung_cancer?"],
                                   ["bronchitis?", "CRUCRU?"],
                                   ["tuberculosis?"]])
コード例 #14
0
    def test_chi2(self):
        learner = gum.BNLearner(self.agrumSrcDir('asia3.csv'))

        stat, pvalue = learner.chi2("smoking?", "lung_cancer?")
        self.assertAlmostEqual(stat, 36.2256, delta=1e-4)
        self.assertAlmostEqual(pvalue, 0, delta=1e-4)

        stat, pvalue = learner.chi2("smoking?", "visit_to_Asia?")
        self.assertAlmostEqual(stat, 1.1257, delta=1e-4)
        self.assertAlmostEqual(pvalue, 0.2886, delta=1e-4)

        stat, pvalue = learner.chi2("lung_cancer?", "tuberculosis?")
        self.assertAlmostEqual(stat, 0.6297, delta=1e-4)
        self.assertAlmostEqual(pvalue, 0.4274, delta=1e-4)

        stat, pvalue = learner.chi2("lung_cancer?", "tuberculosis?",
                                    ["tuberculos_or_cancer?"])
        self.assertAlmostEqual(stat, 58.0, delta=1e-4)
        self.assertAlmostEqual(pvalue, 0.0, delta=1e-4)

        learner2 = gum.BNLearner(self.agrumSrcDir('chi2.csv'))

        stat, pvalue = learner2.chi2("A", "C")
        self.assertAlmostEqual(stat, 0.0007, delta=1e-3)
        self.assertAlmostEqual(pvalue, 0.978, delta=1e-3)

        stat, pvalue = learner2.chi2("A", "B")
        self.assertAlmostEqual(stat, 21.4348, delta=1e-3)
        self.assertAlmostEqual(pvalue, 3.6e-6, delta=1e-5)

        stat, pvalue = learner2.chi2("B", "A")
        self.assertAlmostEqual(stat, 21.4348, delta=1e-3)
        self.assertAlmostEqual(pvalue, 3.6e-6, delta=1e-5)

        stat, pvalue = learner2.chi2("B", "D")
        self.assertAlmostEqual(stat, 0.903, delta=1e-3)
        self.assertAlmostEqual(pvalue, 0.341, delta=1e-3)

        stat, pvalue = learner2.chi2("A", "C", ["B"])
        self.assertAlmostEqual(stat, 15.2205, delta=1e-3)
        self.assertAlmostEqual(pvalue, 0.0005, delta=1e-4)
コード例 #15
0
    def test_EM(self):
        learner = gum.BNLearner(self.agrumSrcDir('EM.csv'), ["#"])
        self.assertFalse(learner.hasMissingValues())

        learner = gum.BNLearner(self.agrumSrcDir('EM.csv'), ["?"])
        self.assertTrue(learner.hasMissingValues())

        dag = gum.DAG()
        for i in range(len(learner.names())):
            dag.addNodeWithId(i)

        dag.addArc(1, 0)
        dag.addArc(2, 1)
        dag.addArc(3, 2)

        with self.assertRaises(gum.MissingValueInDatabase):
            learner.learnParameters(dag)

        learner.useEM(1e-3)
        learner.useSmoothingPrior()
        learner.learnParameters(dag, False)
コード例 #16
0
    def testParameterLearning(self):
        bn = gum.loadBN(self.agrumSrcDir('asia_bool.bif'), verbose=False)

        learner = gum.BNLearner(self.agrumSrcDir('asia3.csv'), bn)
        learner.setInitialDAG(bn.dag())
        learner.useScoreLog2Likelihood()
        learner.useSmoothingPrior(1.0)

        bn2 = learner.learnParameters()
        for i in range(bn.size()):
            # self.assertEqual(str(bn2.variable(i)), str(bn.variable(bn.idFromName(bn2.variable(i).name()))))
            self.assertEqual(
                set(bn2.variable(i).labels()),
                set(
                    bn.variable(bn.idFromName(
                        bn2.variable(i).name())).labels()))

        bn = gum.loadBN(self.agrumSrcDir('asia_bool.bif'), verbose=False)
        # there is a beurk modality in asia3-faulty.csv
        with self.assertRaises(gum.UnknownLabelInDatabase):
            learner = gum.BNLearner(self.agrumSrcDir('asia3-faulty.csv'), bn)
コード例 #17
0
    def testDBNTonda(self):
        dbn = gum.BayesNet()
        l = [
            dbn.add(gum.LabelizedVariable(name, name, nbr))
            for (name, nbr) in [("bf_0", 4), ("bf_t", 4), ("c_0", 5), (
                "c_t", 5), ("h_0", 5), ("h_t", 5), ("tf_0",
                                                    5), ("tf_t",
                                                         5), ("wl_0",
                                                              4), ("wl_t", 4)]
        ]
        for node in ["c_t", "h_t", "wl_t"]:
            dbn.addArc(dbn.idFromName("tf_0"), dbn.idFromName(node))
            dbn.addArc(dbn.idFromName("bf_0"), dbn.idFromName(node))
        dbn.addArc(dbn.idFromName("c_0"), dbn.idFromName("c_t"))
        dbn.addArc(dbn.idFromName("h_0"), dbn.idFromName("h_t"))
        dbn.addArc(dbn.idFromName("wl_0"), dbn.idFromName("wl_t"))

        csvfile = self.agrumSrcDir('DBN_Tonda.csv')
        l1 = gum.BNLearner(csvfile)
        l1.setInitialDAG(dbn.dag())
        l1.useScoreLog2Likelihood()
        l1.useSmoothingPrior()
        bn1 = l1.learnParameters()

        l2 = gum.BNLearner(csvfile, dbn)
        l2.setInitialDAG(dbn.dag())
        l2.useScoreLog2Likelihood()
        l2.useSmoothingPrior()
        bn2 = l2.learnParameters()

        p1 = bn1.cpt(bn1.idFromName("c_0"))
        I1 = gum.Instantiation(p1)
        p2 = bn2.cpt(bn2.idFromName("c_0"))
        I2 = gum.Instantiation(p2)
        I1.setFirst()
        I2.setFirst()
        while not I1.end():
            self.assertEqual(p1.get(I1), p2.get(I2))
            I1.inc()
            I2.inc()
コード例 #18
0
ファイル: tme5.py プロジェクト: jeremySHI1022/MAPSI
def learn_parameters(bn_struct, ficname):
    # création du dag correspondant au bn_struct
    graphe = gum.DAG()
    nodes = [graphe.addNode() for i in range(bn_struct.shape[0])]
    for i in range(bn_struct.shape[0]):
        for parent in bn_struct[i]:
            graphe.addArc(nodes[parent], nodes[i])

    # appel au BNLearner pour apprendre les paramètres
    learner = gum.BNLearner(ficname)
    learner.useScoreLog2Likelihood()
    learner.useAprioriSmoothing()
    return learner.learnParameters(graphe)
コード例 #19
0
    def predict(self, dataset: DatasetInterface) -> List[Relation]:
        # Load from file as can't be used directly from a DataFrame.
        learner = gum.BNLearner(str(dataset.get_filepath()))

        # Greedy Search.
        if self.algorithm == self.LEARNER_GES:
            learner.useGreedyHillClimbing()

        # Tabu search.
        else:
            learner.useLocalSearchWithTabuList()

        return self.__build_relations(learner.learnBN())
コード例 #20
0
 def run(df, pc=None):
     """
     Run the algorithm against the dataframe to return a dot string.
     """
     dot_str = None
     try:
         fp = tempfile.NamedTemporaryFile(suffix='.csv')
         df.to_csv(fp.name, encoding='utf-8', index=False)
         learner = gum.BNLearner(fp.name)
         learner.useGreedyHillClimbing()
         bn = learner.learnBN()
         return bn.toDot()
     except Exception as e:
         _logger.error(str(e))
         print(str(e))
     return dot_str
コード例 #21
0
ファイル: learning.py プロジェクト: mArtukhov/LINDA_DSS
def learnBN(file_path, algorithm="Hill Climbing"):
    """Given a single array from which one pretends to generate local explanations from
    Draw samples from a uniform distribution within a range of feature_val +- variance
    Returns a matrix with a number of samples (by default 300) with permutations 
    of each feature of the input vector
    
    Parameters
    ----------
    my_array : np.array
    	The datapoint to be locally explained
    samples : int, optional
        The number of permutations to generate from the original vector (default is 300)
    variance : int, optional
    	Quantity to permute in each feature (default is 0.25)
    	
    Returns
    -------
    permutations : matrix
    	a 2-D matrix with dimensions (samples, features) with all the permutations of the 
    	original vector
    """

    learner = gum.BNLearner(file_path)

    if (algorithm == "Hill Climbing"):
        print("Selecting Greedy Hill Climbing Algorithm")
        learner.useGreedyHillClimbing()

    if (algorithm == "Local Search"):
        print("Selecting Local Search Algorithm")
        bn = learner.useLocalSearchWithTabuList()

    if (algorithm == "3off2"):
        print("Selecting 3Off2 Algorithm")
        learner.use3off2()

    if (algorithm == "miic"):
        print("Selecting MIIC Algorithm")
        learner.useMIIC()

    learner.learnBN()

    bn = learner.learnBN()
    essencGraph = gum.EssentialGraph(bn)
    infoBN = gnb.getInformation(bn)

    return [bn, infoBN, essencGraph]
コード例 #22
0
 def run(df, pc=None):
     """
     Run the algorithm against the dataframe and gets a list of unobserved latent edges.
     """
     try:
         fp = tempfile.NamedTemporaryFile(suffix='.csv')
         df.to_csv(fp.name, encoding='utf-8', index=False)
         learner = gum.BNLearner(fp.name)
         learner.useMIIC()
         bn = learner.learnBN()
         latent_edges = []
         latent_edges.extend([(bn.variable(i).name(), bn.variable(j).name())
                              for (i, j) in learner.latentVariables()])
         return latent_edges
     except Exception as e:
         _logger.error(str(e))
         print(str(e))
     return None
コード例 #23
0
    def test_dbWithGuil(self):
        filename = self.agrumSrcDir('csv_quoted.csv')
        with open(filename, "w") as src:
            src.write("""X,Y,Z
0,1,2
0,1",0
0,0,2
1,"0,2
0,"1",1
1,1,1
0,1,1
""")

        with self.assertRaises(SyntaxError):
            learner = gum.BNLearner(filename)
            learner.useScoreBIC()
            learner.useGreedyHillClimbing()
            bn = learner.learnBN()
コード例 #24
0
    def testHillClimbingAccurate(self):
        learner = gum.BNLearner(self.agrumSrcDir('asia.csv'))

        witness = [
            'smoking?', 'lung_cancer?', 'bronchitis?', 'visit_to_Asia?',
            'tuberculosis?', 'tuberculos_or_cancer?', 'dyspnoea?',
            'positive_XraY?'
        ]
        for n in witness:
            self.assertTrue(n in learner.names())
        for n in learner.names():
            self.assertTrue(n in witness)

        learner.useGreedyHillClimbing()
        bn = learner.learnBN()

        ref = gum.loadBN(self.agrumSrcDir('asia2.bif'), verbose=False)

        f = gum.ExactBNdistance(bn, ref)
        res = f.compute()
        self.assertAlmostEqual(res['klPQ'], 0.5, delta=0.5)
コード例 #25
0
def learn_bn(filepath, learn_algo):
    try:
        learner = agrum.BNLearner(filepath)
    except IOError as e:
        print("learn_and_create_bn - I/O error({0}): {1}".format(
            e.errno, e.strerror))
        exit
    ''' Select learning method '''
    if learn_algo == 'hand-coded':
        learner.addMandatoryArc(0, 2)
        learner.addMandatoryArc(1, 2)
        if sim_param.distance_param:
            learner.addMandatoryArc(3, 2)
    elif learn_algo == 'hillclimbing':
        learner.useGreedyHillClimbing()
    elif learn_algo == 'tabu':
        learner.useLocalSearchWithTabuList()
    elif learn_algo == 'k2':
        learner.useK2([3, 2, 1, 0])

    else:
        print(
            'ERROR - learn_bn : there was a problem while selecting the learner'
        )
        sys.exit()
    ''' Select score (BDEU by default)'''
    if sim_param.score_likelihood:
        learner.useScoreLog2Likelihood()
        learner.setMaxIndegree(2)

    if sim_param.score_bic:
        learner.useScoreBIC

    if sim_param.score_aic:
        learner.useScoreAIC

    bn = learner.learnBN()
    print("BN learned.\n", bn)
    return bn
コード例 #26
0
    def test_dirichlet(self):
        bn = gum.fastBN("A->B<-C->D->E<-B")
        gum.generateSample(bn,
                           2000,
                           self.agrumSrcDir("dirichlet.csv"),
                           with_labels=True)

        bn2 = gum.fastBN("A->B->C->D->E")
        gum.generateSample(bn2,
                           2000,
                           self.agrumSrcDir("database.csv"),
                           with_labels=True)

        # bn is used to give the variables and their domains
        learner = gum.BNLearner(self.agrumSrcDir("database.csv"), bn)
        learner.useDirichletPrior(self.agrumSrcDir("dirichlet.csv"), 10)
        learner.useScoreAIC(
        )  # or another score with no included prior such as BDeu

        bn3 = learner.learnBN()

        self.assertEqual(bn.size(), 5)
コード例 #27
0
def learnDAG(sample, dis_method='quantile', nbins=5, threshold=25):
    # data = pd.read_csv(file_name, nrows=size)

    names = list(sample.getDescription())

    csvfile = tf.NamedTemporaryFile(delete=False)
    csvfilename = csvfile.name + '.csv'
    csvfile.close()

    sample.exportToCSVFile(csvfilename, ',')

    start = time.time()
    discretizer = skbn.BNDiscretizer(defaultDiscretizationMethod=dis_method,
                                     defaultNumberOfBins=nbins,
                                     discretizationThreshold=threshold)

    variables = [
        discretizer.createVariable(name, sample.getMarginal([name]))
        for name in names
    ]

    bn = gum.BayesNet()
    for variable in variables:
        bn.add(variable)

    learner = gum.BNLearner(csvfilename, bn)
    learner.useMIIC()
    learner.useNMLCorrection()

    dag = learner.learnDAG()
    ndag = otagr.NamedDAG(dag, names)

    end = time.time()

    os.remove(csvfilename)

    return ndag, start, end
コード例 #28
0
    def test_loglikelihood(self):
        learner = gum.BNLearner(self.agrumSrcDir('chi2.csv'))
        self.assertEqual(learner.nbRows(), 500)
        self.assertEqual(learner.nbCols(), 4)

        siz = -1.0 * learner.nbRows()

        stat = learner.logLikelihood(["A"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 0.99943499, delta=1e-5)
        stat = learner.logLikelihood(["B"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 0.9986032, delta=1e-5)
        stat = learner.logLikelihood(["A", "B"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 1.9668973, delta=1e-5)
        stat = learner.logLikelihood(["A"], ["B"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 1.9668973 - 0.9986032, delta=1e-5)

        stat = learner.logLikelihood(["C"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 0.99860302, delta=1e-5)
        stat = learner.logLikelihood(["D"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 0.40217919, delta=1e-5)
        stat = learner.logLikelihood(["C", "D"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 1.40077995, delta=1e-5)
        stat = learner.logLikelihood(["C"], ["D"]) / siz  # LL=-N.H
        self.assertAlmostEqual(stat, 1.40077995 - 0.40217919, delta=1e-5)
コード例 #29
0
def predicted_val(dataframe, dbn, predicted_var_0, unique_time_series, var_ts,
                  valid_file, training_file, variable_0, read_var_0,
                  global_pred_rate, averages):
    with open("Predicted_Values_Optimized.csv", "w", newline='') as fp:
        header = ["parcelle", "sequence"]
        for variable in predicted_var_0:
            for i in range(1, 3):
                variable_t = variable + "_" + str(i)
                header.append(variable_t)
        w = csv.writer(fp)
        w.writerow(header)

        for index, ts in enumerate(unique_time_series):
            valid_df = dataframe[dataframe[var_ts] == ts].copy()
            valid_df = valid_df.reset_index()
            training_df = dataframe[dataframe[var_ts] != ts].copy()
            training_df = training_df.reset_index()

            valid_df.to_csv(valid_file, index=False)
            training_df.to_csv(training_file, index=False)

            learner = gum.BNLearner(training_file, dbn)
            learner.setInitialDAG(dbn.dag())
            learner.useScoreLog2Likelihood()
            learner.useAprioriSmoothing(0.01)

            dbn_2 = learner.learnParameters(dbn.dag())

            steps = 3

            bn = gdyn.unroll2TBN(dbn_2, steps)
            #            bn_to_pdf(bn, "DBN_unrolled_2.pdf")

            dictionary = dict()
            for variable in variable_0:
                tempdf = valid_df.loc[0, :]
                dictionary[variable] = int(tempdf[variable])

            original_values = dict()
            for variable in predicted_var_0:
                original_values[variable] = []

            global_pred_rate[index] = dict()
            pred_row = list(valid_df[["parcelle", "sequence"]].loc[0, :])

            for recorder in range(0, steps - 1):
                for variable in read_var_0:
                    row = valid_df.loc[recorder, :]
                    variable_name = variable + "_" + str(recorder + 1)
                    var_name_in_df = variable + "_t"
                    dictionary[variable_name] = int(row[var_name_in_df])
                for variable in predicted_var_0:
                    var_name_in_df = variable + "_t"
                    original_values[variable].append(int(row[var_name_in_df]))

            inference = gum.LazyPropagation(bn)
            inference.setEvidence(dictionary)
            inference.makeInference()

            pred_values = dict()
            predicted_values = dict()

            for variable in predicted_var_0:
                predicted_values[variable] = []
            for variable in predicted_var_0:
                pred_values[variable] = dict()
                for i in range(1, steps):
                    variable_t = variable + "_" + str(i)
                    predicted_values = []
                    predicted_values = inference.posterior(
                        bn.idFromName(variable_t))[:]
                    index_used = original_values[variable][i - 1]
                    t_list = [0] * len(predicted_values)
                    t_list[index_used] = 1
                    pred_values[variable][variable_t] = deprobabilize(
                        averages[variable], predicted_values)
                    pred_row.append(pred_values[variable][variable_t])

            w.writerow(pred_row)


#    gnb.showPotential(bn.cpt("s_2"))
    return
コード例 #30
0
template=gum.BayesNet()
template.add(gum.LabelizedVariable("target", "target", ['<=50K', '>50K']))
template.add(gum.LabelizedVariable("sex", "sex",['Male','Female']))
template.add(gum.LabelizedVariable("age_range", "age_range",['0-20','21-30','31-65','66-90']))
template.add(gum.LabelizedVariable("race", "race",['White', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other', 'Black']))
template.add(gum.LabelizedVariable("workclass", "workclass",['Private', 'Self-emp-not-inc', 'Self-emp-inc', 'Federal-gov', 'Local-gov', 'State-gov', 'Without-pay', 'Never-worked']))
template.add(gum.LabelizedVariable("relationship", "relationship", ['Wife', 'Own-child', 'Husband', 'Not-in-family', 'Other-relative', 'Unmarried']))
template.add(gum.LabelizedVariable("marital_status", "marital_status", ['Married-civ-spouse', 'Divorced', 'Never-married', 'Separated', 'Widowed', 'Married-spouse-absent', 'Married-AF-spouse'])) 
template.add(gum.LabelizedVariable("occupation", "occupation",['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces']))            
gnb.showBN(template)

train_df.to_csv(os.path.join('/content/gdrive/My Drive/train_data2.csv'), index=False)
file = os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv')

learner = gum.BNLearner(file, template)
bn = learner.learnBN()
bn

gnb.showInformation(bn,{},size="20")

gnb.showInference(bn)

gnb.showPosterior(bn,evs={"sex": "Male", "age_range": '21-30'},target='target')

gnb.sideBySide(bn, gum.MarkovBlanket(bn, 'target'), captions=["Learned Bayesian Network", "Markov blanket of 'target'"])



ie=gum.LazyPropagation(bn)
init_belief(ie)