Example #1
0
def timer(inputfile, trials, datalength):

    # load nodedata and graphskeleton
    nd = NodeData()
    skel = GraphSkeleton()
    #print "bp1"
    nd.load(inputfile)
    #print "bp2"
    skel.load(inputfile)
    #print "bp3"

#    msg = "%d, %d" % (asizeof(nd), asizeof(skel))
 #   print >>op, msg

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    bn = DiscreteBayesianNetwork(skel, nd)

    # instantiate pgm learner
    l = PGMLearner()

    # free unused memory
    del nd
    
    #sum1 = summary.summarize(muppy.get_objects())
    #summary.print_(sum1)
    
    # TIME
    totaltime = 0
    for _ in range(trials): 
        data = bn.randomsample(datalength)
        start = time.clock()
        ret = l.discrete_mle_estimateparams(skel, data)
        elapsed = time.clock() - start
        totaltime += elapsed
    totaltime /= trials


    print json.dumps(ret.Vdata, indent=1)
    return totaltime
Example #2
0
def timer(inputfile, trials):

    # load nodedata and graphskeleton
    nd = NodeData()
    skel = GraphSkeleton()
    nd.load(inputfile)
    skel.load(inputfile)

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    bn = DiscreteBayesianNetwork(skel, nd)
    
    # TIME
    totaltime = 0
    for _ in range(trials): 
        start = time.clock()
        ret = bn.randomsample(100)
        elapsed = time.clock() - start
        totaltime += elapsed
    totaltime /= trials

    return totaltime
Example #3
0
    def discrete_constraint_estimatestruct(self, data, pvalparam=0.05, indegree=1):
        '''
        Learn a Bayesian network structure from discrete data given by *data*, using constraint-based approaches. This function first calculates all the independencies and conditional independencies present between variables in the data. To calculate dependencies, it uses the *discrete_condind* method on each pair of variables, conditioned on other sets of variables of size *indegree* or smaller, to generate a chi-squared result and a p-value. If this p-value is less than *pvalparam*, the pair of variables are considered dependent conditioned on the variable set. Once all true dependencies -- pairs of variables that are dependent no matter what they are conditioned by -- are found, the algorithm uses these dependencies to construct a directed acyclic graph. It returns this DAG in the form of a :doc:`GraphSkeleton <graphskeleton>` class. 

        Arguments:
            1. *data* -- An array of dicts containing samples from the network in {vertex: value} format. Example::

                    [
                        {
                            'Grade': 'B',
                            'SAT': 'lowscore',
                            ...
                        },
                        ...
                    ]

            2. *pvalparam* -- (Optional, default is 0.05) The p-value below which to consider something significantly unlikely. A common number used is 0.05. This is passed to *discrete_condind* when it is called.
            3. *indegree* -- (Optional, default is 1) The upper bound on the size of a witness set (see Koller et al. 85). If this is larger than 1, a huge amount of samples in *data* are required to avoid a divide-by-zero error.

        Usage example: this would learn structure from a set of 8000 discrete samples::

            import json

            from libpgm.nodedata import NodeData
            from libpgm.graphskeleton import GraphSkeleton
            from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
            from libpgm.pgmlearner import PGMLearner
            
            # generate some data to use
            nd = NodeData()
            nd.load("../tests/unittestdict.txt")    # an input file
            skel = GraphSkeleton()
            skel.load("../tests/unittestdict.txt")
            skel.toporder()
            bn = DiscreteBayesianNetwork(skel, nd)
            data = bn.randomsample(8000)

            # instantiate my learner 
            learner = PGMLearner()

            # estimate structure
            result = learner.discrete_constraint_estimatestruct(data)

            # output
            print json.dumps(result.E, indent=2)

        '''
        assert (isinstance(data, list) and data and isinstance(data[0], dict)), "Arg must be a list of dicts."

        # instantiate array of variables and array of potential dependencies
        variables = data[0].keys()
        ovariables = variables[:]
        dependencies = []
        for x in variables:
            ovariables.remove(x)
            for y in ovariables:
                if (x != y):
                    dependencies.append([x, y])


        # define helper function to find subsets
        def subsets(array):
            result = []
            for i in range(indegree + 1):
                comb = itertools.combinations(array, i)
                for c in comb:
                    result.append(list(c))
            return result

        witnesses = []
        othervariables = variables[:]

        # for each pair of variables X, Y:
        for X in variables:
            othervariables.remove(X)
            for Y in othervariables:

                # consider all sets of witnesses that do not have X or Y in
                # them, and are less than or equal to the size specified by 
                # the "indegree" argument
                for U in subsets(variables):
                    if (X not in U) and (Y not in U) and len(U) <= indegree:
                        
                        # determine conditional independence
                        chi, pv, witness = self.discrete_condind(data, X, Y, U)
                        if pv > pvalparam: 
                            msg = "***%s and %s are found independent (chi = %f, pv = %f) with witness %s***" % (X, Y, chi, pv, U)
                            try:
                                dependencies.remove([X, Y])
                                dependencies.remove([Y, X])
                            except:
                                pass
                            witnesses.append([X, Y, witness])
                            break

        # now that we have found our dependencies, run build PDAG (cf. Koller p. 89) 
        # with the stored set of independencies:
        
        # assemble undirected graph skeleton
        pdag = GraphSkeleton()
        pdag.E = dependencies
        pdag.V = variables
        
        # adjust for immoralities (cf. Koller 86)
        dedges = [x[:] for x in pdag.E]
        for edge in dedges:
            edge.append('u')

        # define helper method "exists_undirected_edge"
        def exists_undirected_edge(one_end, the_other_end):
            for edge in dedges:
                if len(edge) == 3:
                    if (edge[0] == one_end and edge[1] == the_other_end):
                        return True
                    elif (edge[1] == one_end and edge[0] == the_other_end):
                        return True
            return False

        # define helper method "exists_edge"
        def exists_edge(one_end, the_other_end):
            if exists_undirected_edge(one_end, the_other_end):
                return True
            elif [one_end, the_other_end] in dedges:
                return True
            elif [the_other_end, one_end] in dedges: 
                return True
            return False

        for edge1 in reversed(dedges):
            for edge2 in reversed(dedges):
                if (edge1 in dedges) and (edge2 in dedges):
                    if edge1[0] == edge2[1] and not exists_edge(edge1[1], edge2[0]):
                        if (([edge1[1], edge2[0], [edge1[0]]] not in witnesses) and ([edge2[0], edge1[1], [edge1[0]]] not in witnesses)): 
                            dedges.append([edge1[1], edge1[0]])
                            dedges.append([edge2[0], edge2[1]])
                            dedges.remove(edge1)
                            dedges.remove(edge2)
                    elif edge1[1] == edge2[0] and not exists_edge(edge1[0], edge2[1]):
                        if (([edge1[0], edge2[1], [edge1[1]]] not in witnesses) and ([edge2[1], edge1[0], [edge1[1]]] not in witnesses)): 
                            dedges.append([edge1[0], edge1[1]])
                            dedges.append([edge2[1], edge2[0]])
                            dedges.remove(edge1)
                            dedges.remove(edge2)
                    elif edge1[1] == edge2[1] and edge1[0] != edge2[0] and not exists_edge(edge1[0], edge2[0]):
                        if (([edge1[0], edge2[0], [edge1[1]]] not in witnesses) and ([edge2[0], edge1[0], [edge1[1]]] not in witnesses)): 
                            dedges.append([edge1[0], edge1[1]])
                            dedges.append([edge2[0], edge2[1]])
                            dedges.remove(edge1)
                            dedges.remove(edge2)
                    elif edge1[0] == edge2[0] and edge1[1] != edge2[1] and not exists_edge(edge1[1], edge2[1]):
                        if (([edge1[1], edge2[1], [edge1[0]]] not in witnesses) and ([edge2[1], edge1[1], [edge1[0]]] not in witnesses)): 
                            dedges.append([edge1[1], edge1[0]])
                            dedges.append([edge2[1], edge2[0]])
                            dedges.remove(edge1)
                            dedges.remove(edge2)


        # use right hand rules to improve graph until convergence (Koller 89)
        olddedges = []
        while (olddedges != dedges):
            olddedges = [x[:] for x in dedges]
            for edge1 in reversed(dedges):
                for edge2 in reversed(dedges):
                    
                    # rule 1
                    inverted = False
                    check1, check2 = False, True
                    if (edge1[1] == edge2[0] and len(edge1) == 2 and len(edge2) == 3):
                        check1 = True
                    elif (edge1[1] == edge2[1] and len(edge1) == 2 and len(edge2) == 3):
                        check = True
                        inverted = True 
                    for edge3 in dedges:
                        if edge3 != edge1 and ((edge3[0] == edge1[0] and edge3[1]
                            == edge2[1]) or (edge3[1] == edge1[0] and edge3[0]
                            == edge2[1])):
                            check2 = False
                    if check1 == True and check2 == True:
                        if inverted:
                            dedges.append([edge1[1], edge2[0]])
                        else:
                            dedges.append([edge1[1], edge2[1]])
                        dedges.remove(edge2)

                    # rule 2
                    check1, check2 = False, False
                    if (edge1[1] == edge2[0] and len(edge1) == 2 and len(edge2) == 2):
                        check1 = True
                    for edge3 in dedges:
                        if ((edge3[0] == edge1[0] and edge3[1]
                            == edge2[1]) or (edge3[1] == edge1[0] and edge3[0]
                            == edge2[1]) and len(edge3) == 3):
                            check2 = True
                    if check1 == True and check2 == True:
                        if edge3[0] == edge1[0]:
                            dedges.append([edge3[0], edge3[1]])
                        elif edge3[1] == edge1[0]:
                            dedges.append([edge3[1], edge3[0]])
                        dedges.remove(edge3)

                    # rule 3
                    check1, check2 = False, False
                    if len(edge1) == 2 and len(edge2) == 2:
                        if (edge1[1] == edge2[1] and edge1[0] != edge2[0]):
                            check1 = True
                    for v in variables:
                        if (exists_undirected_edge(v, edge1[0]) and
                            exists_undirected_edge(v, edge1[1]) and
                            exists_undirected_edge(v, edge2[0])):
                            check2 = True
                            if check1 == True and check2 == True:
                                dedges.append([v, edge1[1]])
                                for edge3 in dedges:
                                    if (len(edge3) == 3 and ((edge3[0] == v and edge3[1]
                                        == edge1[1]) or (edge3[1] == v and edge3[0] ==
                                        edge1[1]))):
                                        dedges.remove(edge3)
                    

        # return one possible graph skeleton from the pdag class found
        for x in range(len(dedges)):
            if len(dedges[x]) == 3:
                dedges[x] = dedges[x][:2]
        
        pdag.E = dedges
        pdag.toporder()
        return pdag