예제 #1
0
    def runTest(self):
        num_vars = 5
        num_vals = 10
        for x in xrange(num_runs):
            vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)])
            # should get some random data with 0.
            f = Factor(variables = vs.keys()
                    ,data = [abs(randint(0,20)) for i in xrange(num_vals**num_vars)]
                    #,data = [5, 0, 1, 2]
                    #,data = [13,0,0,20]
                    #,data = [14,15,20,16]
                    ,domain = Domain()
                    ,new_domain_variables=vs
                    ,check = True)
            records = []
            for inst in f.insts():
                records.append(inst + (f[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)
            #print records

            cf = IncrementalCompactFactor(rawdata, rmin=0)
            #print 'old tree:'
            #print cf

#            g = Factor(variables = vs.keys()
#                    #,data = [13,2,0,0]
#                    ,data = [14,15,0,0]
#                    ,domain = Domain()
#                    ,new_domain_variables=vs
#                    ,check = True)
            g = f.copy(copy_domain=True)
            def swap_some(x):
                r = random()
                if x == 0:
                    if r <= 0.5:
                        return randint(1,5)
                    return 0
                elif r <= 0.5:
                    return 0
                return x
            def invert_some(x):
                return 5-x
            g.map(swap_some)
            #print g
            f += g
            records = []
            for inst in g.insts():
                records.append(inst + (g[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)
            cf.update(rawdata)
            #print 'new tree:'
            #print cf

            for variables in powerset(vs.keys()):
                g = f.copy(copy_domain=True)
                g.marginalise_away(g.variables() - frozenset(variables))
                self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
예제 #2
0
    def tryModel(self, model):
        # generate some samples
        cf = []
        sampler = ForwardSampler(model)
        samples = sampler.samples(100)
        cf.append(CompactFactor(samples,domain=Domain()))
        icf = IncrementalCompactFactor(samples, domain=Domain())

        for i in xrange(10):
            samples = sampler.samples(100)
            cf.append(CompactFactor(samples,domain=Domain()))
            icf.update(samples)

            # see if the sum of the CPT Factors in cf match that of icf
            for child in model.variables():
                family = model.adg().parents(child) | set([child])
                a = icf.makeFactor(family)
                b = cf[0].makeFactor(family)
                for f in cf[1:]:
                    b += f.makeFactor(family)
                self.assert_(same_factor(a,b))
예제 #3
0
    def runTest(self):
        num_vars = 5
        num_vals = 10
        for i in xrange(num_runs):
            vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)])
            # should get some random data with 0.
            f = Factor(variables = vs.keys()
                    ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)]
                    ,domain = Domain()
                    ,new_domain_variables=vs
                    ,check = True)
            records = []
            for inst in f.insts():
                records.append(inst + (f[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)

            cf = IncrementalCompactFactor(rawdata)
            for variables in powerset(vs.keys()):
                g = f.copy(copy_domain=True)
                g.marginalise_away(g.variables() - frozenset(variables))
                self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
예제 #4
0
class CausalWorld(SubDomain):
    """A container for experimental data derived from a causal Bayesian networks."""
    def __init__(self, bn,burnin=1000):
        """
        @param bn: A causal Bayesian network from which samples are drawn.
        @type bn: L{CBN}
        @param burnin: The burn in for the L{GibbsSampler} used for generating
        interventional data.
        @type burnin: int
        """
        super(CausalWorld,self).__init__(domain=Domain.copy(bn),variables=bn.variables())
        self._pure_model = bn.copy(copy_domain=True)
        self._pure_sampler = ForwardSampler(self._pure_model)
        self._burnin = burnin
        self._data = None
        self._inter_sampler = {}
        self._inter_data = {}

    def copy(self):
        cpy = copy(self)
        cpy._pure_model = self._pure_model.copy(copy_domain=True)
        cpy._pure_sampler = None
        cpy._inter_sampler = None
        cpy._inter_data = {}
        for k in self._inter_data.keys():
            cpy._inter_data[k] = self._inter_data[k].copy()
        # merge _data into _inter_data!
        if self._data is not None:
            cpy._data = self._data.copy()
        return cpy

    def interventions(self):
        """
        @return: A list of interventional data sets available. A zero length
        element denotes observational data.
        """
        avail_data = self._inter_data.keys()
        if self._data is not None:
            avail_data.append(frozenset())
        return avail_data

    def intervention_data(self, inter):
        """Obtain the L{IncrementalCompactFactor} corresponding to
        the intervention C{inter}. C{inter} is typically an element of
        the return value of L{interventions}. A zero length C{inter}
        indicates observational data.
        """
        if len(inter) == 0:
            return self._data
        return self._inter_data[inter]

    def observe(self, num_samples, skip=0):
        """Draw C{num_samples} observational samples, separated by C{skip}
        steps of the sampler.  These samples are appended to any existing
        observational samples and then returned.
        @return: L{IncrementalCompactFactor}
        """
        samples = self._pure_sampler.samples(num_samples,skip)
        if self._data is None:
            self._data = IncrementalCompactFactor(samples,domain=Domain())
        else:
            self._data.update(samples)
        return self._data

    def query(self, intervention, num_samples,skip=0):
        """Draw C{num_samples} interventional samples, separated by C{skip}
        steps of the sampler.  These samples are appended to any existing
        interventional samples and then returned.  The intervention made is
        that of C{intervention}.
        @param intervention: A dictionary mapping variables in the L{CBN} to
        a single value in the domain.
        @return: L{IncrementalCompactFactor}
        """
        k = frozenset(intervention.keys())
        if not self._inter_sampler.has_key(k):
            do_model = CBN.from_bn(self._pure_model.copy(copy_domain=True))
            do_model.intervene(intervention)
            self._inter_sampler[k] = GibbsSampler(do_model, self._burnin)
        do_sampler = self._inter_sampler[k]
        samples = do_sampler.samples(num_samples,skip)
        if not self._inter_data.has_key(k):
            self._inter_data[k] = IncrementalCompactFactor(samples,domain=Domain())
        else:
            self._inter_data[k].update(samples)
        return self._inter_data[k]

    def makeFactor(self, variables):
        """Construct a factor from only observations (since cannot determine
        whether interventional evidence is admissible)."""
        return self._data.makeFactor(variables)

    def makeCPT(self, child, parents, force_cpt=False, prior=1.0, check=False):
        """Use all data applicable to C{child} to make its CPT
        @param prior: the Dirichlet prior parameter (the same parameter value
        is used for all instances!)  Note there may be some problems with
        this method: a B{different} prior is used by the BDeu score. However,
        in practice, for parameter estimation, this prior method seems to be ok.
        I was lazy and it was simple to implement (cb).  If prior is zero, then
        the parameters are the maximum likelihood estimation solutions.
        """
        # child can use all observable data, and all interventional data where
        # child was not intervened.
        variables = set(parents) | set([child])
        f_child = self._data.makeFactor(variables)
        for k in self._inter_data.keys():
            if child in k:
                continue
            f = self._inter_data[k].makeFactor(variables)
            # fill in the missing zeros
            f.data_extend(dict([(var,f_child.values(var)) for var in k&f.variables()]))
            # domain monkeying
            for var in k:
                f.change_domain_variable(var, f_child.values(var))
            # eventually it looks like something edible.
            f_child += self._inter_data[k].makeFactor(variables)
            # chomp
        return CPT(f_child+prior, child, cpt_check=check, cpt_force=force_cpt)

    def family_score(self, child, parents):
        """Obtain the BDeu score of a particular family (consisting of a C{child} and
        its C{parents}) using all applicable experimental data."""
        return self.makeCPT(child, parents, prior=0.0, force_cpt=False, check=False).bdeu_score()