def __init__(self, nodes, name=None, n_samples=100):
     self.nodes = nodes
     self._inference_method = 'sumproduct'
     # We need to divine the domains for Factor nodes here...
     # First compile a mapping of factors to variables
     # from the arg spec...
     function_args = dict()
     arg_domains = dict()
     for node in self.nodes:
         if isinstance(node, VariableNode):
             #if not hasattr(node, 'domain'):
             #    node.domain = [True, False]
             arg_domains[node.name] = node.domain
         elif isinstance(node, FactorNode):
             function_args[node.func.__name__] = get_args(node.func)
     # Now if the domains for the
     # factor functions have not been explicitely
     # set we create them based on the variable
     # values it can take.
     for node in self.nodes:
         if isinstance(node, FactorNode):
             if hasattr(node.func, 'domains'):
                 continue
             domains = dict()
             for arg in get_args(node.func):
                 if not arg in arg_domains:
                     print 'WARNING: missing variable for arg:%s' % arg
                 else:
                     domains.update({arg: arg_domains[arg]})
             node.func.domains = domains
     self.name = name
     self.n_samples = n_samples
     # Now try to set the mode of inference..
     try:
         if self.has_cycles():
             # Currently only sampling
             # is supported for cyclic graphs
             self.inference_method = 'sample'
         else:
             # The sumproduct method will
             # give exact likelihoods but
             # only of the graph contains
             # no cycles.
             self.inference_method = 'sumproduct'
     except:
         print 'Failed to determine if graph has cycles, '
         'setting inference to sample.'
         self.inference_method = 'sample'
     self.enforce_minimum_samples = False
def make_product_func(factors):
    '''
    Return a single callable from
    a list of factors which correctly
    applies the arguments to each
    individual factor.

    The challenge here is to return a function
    whose argument list we know and ensure that
    when this function is called, its always
    called with the correct arguments.
    Since the correct argspec is attached
    to the built function it seems that
    it should be up to the caller to
    get the argument list correct.
    So we need to determine when and where its called...

    '''
    args_map = {}
    all_args = []
    domains = {}
    for factor in factors:
        #if factor == 1:
        #    continue
        args_map[factor] = get_args(factor)
        all_args += args_map[factor]
        if hasattr(factor, 'domains'):
            domains.update(factor.domains)
    args = list(set(all_args))
    # Perhaps if we sort the


    def product_func(*product_func_args):
        #import pytest; pytest.set_trace()
        #arg_dict = dict([(a.name, a) for a in product_func_args])
        arg_dict = dict(zip(args, product_func_args))
        #import pytest; pytest.set_trace()
        result = 1
        for factor in factors:
            #domains.update(factor.domains)
            # We need to build the correct argument
            # list to call this factor with.
            factor_args = []
            for arg in get_args(factor):
                if arg in arg_dict:
                    factor_args.append(arg_dict[arg])
            if not factor_args:
                # Since we always require
                # at least one argument we
                # insert a dummy argument
                # so that the unity function works.
                factor_args.append('dummy')
            result *= factor(*factor_args)

        return result

    product_func.argspec = args
    product_func.factors = factors
    product_func.domains = domains
    return memoize(product_func)
    def conditional_gaussianize(f):

        @wraps(f)
        def conditional_gaussianized(*args, **kwds):
            '''Since this function will never
            be called directly we dont need anything here.
            '''
            # First we need to construct a vector
            # out of the args...
            x = zeros((len(args), 1))
            for i, a in enumerate(args):
                x[i, 0] = a
            sigma = conditional_gaussianized.covariance_matrix
            mu = conditional_gaussianized.joint_mu
            return 1 / (2 * math.pi * sigma.det()) ** 0.5 \
                * math.exp(-0.5 * ((x - mu).T * sigma.I * (x - mu))[0, 0])

        conditional_gaussianized.mean = mu
        conditional_gaussianized.std_dev = sigma
        conditional_gaussianized.variance = sigma ** 2
        conditional_gaussianized.raw_betas = betas
        conditional_gaussianized.argspec = get_args(f)
        conditional_gaussianized.entropy = types.MethodType(
            lambda x: len(x.joint_mu) / 2 * \
            (1 + math.log(2 * math.pi)) + \
            0.5 * math.log(x.covariance_matrix.det()), conditional_gaussianized)

        # NOTE: the joint parameters are
        # add to this function at the time of the
        # graph construction

        return conditional_gaussianized
 def __init__(self, source, destination, factors, func):
     self.source = source
     self.destination = destination
     self.factors = factors
     self.func = func
     self.argspec = get_args(func)
     self.domains = func.domains
def build_graph(*args, **kwds):
    '''
    Automatically create all the
    variable and factor nodes
    using only function definitions.
    Since its cumbersome to supply
    the domains for variable nodes
    via the factor domains perhaps
    we should allow a domains dict?
    '''
    # Lets start off identifying all the
    # variables by introspecting the
    # functions.
    variables = set()
    domains = kwds.get('domains', {})
    name = kwds.get('name')
    variable_nodes = dict()
    factor_nodes = []
    if isinstance(args[0], list):
        # Assume the functions were all
        # passed in a list in the first
        # argument. This makes it possible
        # to build very large graphs with
        # more than 255 functions.
        args = args[0]
    for factor in args:
        factor_args = get_args(factor)
        variables.update(factor_args)
        factor_node = FactorNode(factor.__name__, factor)
        #factor_node.func.domains = domains
        # Bit of a hack for now we should actually exclude variables that
        # are not parameters of this function
        factor_nodes.append(factor_node)
    for variable in variables:
        node = VariableNode(
            variable,
            domain=domains.get(variable, [True, False]))
        variable_nodes[variable] = node
    # Now we have to connect each factor node
    # to its variable nodes
    for factor_node in factor_nodes:
        factor_args = get_args(factor_node.func)
        connect(factor_node, [variable_nodes[x] for x in factor_args])
    graph = FactorGraph(variable_nodes.values() + factor_nodes, name=name)
    #print domains
    return graph
Example #6
0
def build_bbn(*args, **kwds):
    '''Builds a BBN Graph from
    a list of functions and domains'''
    variables = set()
    domains = kwds.get('domains', {})
    name = kwds.get('name')
    variable_nodes = dict()
    factor_nodes = dict()

    if isinstance(args[0], list):
        # Assume the functions were all
        # passed in a list in the first
        # argument. This makes it possible
        # to build very large graphs with
        # more than 255 functions, since
        # Python functions are limited to
        # 255 arguments.
        args = args[0]

    for factor in args:
        factor_args = get_args(factor)
        variables.update(factor_args)
        bbn_node = BBNNode(factor)
        factor_nodes[factor.__name__] = bbn_node

    # Now lets create the connections
    # To do this we need to find the
    # factor node representing the variables
    # in a child factors argument and connect
    # it to the child node.

    # Note that calling original_factors
    # here can break build_bbn if the
    # factors do not correctly represent
    # a BBN.
    original_factors = get_original_factors(factor_nodes.values())
    for factor_node in factor_nodes.values():
        factor_args = get_args(factor_node)
        parents = [original_factors[arg] for arg in
                   factor_args if original_factors[arg] != factor_node]
        for parent in parents:
            connect(parent, factor_node)
    bbn = BBN(original_factors, name=name)
    bbn.domains = domains

    return bbn
def make_factor_node_message(node, target_node):
    '''
    The rules for a factor node are:
    take the product of all the incoming
    messages (except for the destination
    node) and then take the sum over
    all the variables except for the
    destination variable.
    >>> def f(x1, x2, x3): pass
    >>> node = object()
    >>> node.func = f
    >>> target_node = object()
    >>> target_node.name = 'x2'
    >>> make_factor_node_message(node, target_node)
    '''

    if node.is_leaf():
        not_sum_func = make_not_sum_func(node.func, target_node.name)
        message = FactorMessage(node, target_node, [node.func], not_sum_func)
        return message

    args = set(get_args(node.func))

    # Compile list of factors for message
    factors = [node.func]

    # Now add the message that came from each
    # of the non-destination neighbours...
    neighbours = node.neighbours
    for neighbour in neighbours:
        if neighbour == target_node:
            continue
        # When we pass on a message, we unwrap
        # the original payload and wrap it
        # in new headers, this is purely
        # to verify the procedure is correct
        # according to usual nomenclature
        in_message = node.received_messages[neighbour.name]
        if in_message.destination != node:
            out_message = VariableMessage(
                neighbour, node, in_message.factors,
                in_message.func)
            out_message.argspec = in_message.argspec
        else:
            out_message = in_message
        factors.append(out_message)

    product_func = make_product_func(factors)
    not_sum_func = make_not_sum_func(product_func, target_node.name)
    message = FactorMessage(node, target_node, factors, not_sum_func)
    return message
Example #8
0
 def assign_clusters(self, bbn):
     assignments_by_family = dict()
     assignments_by_clique = defaultdict(list)
     assigned = set()
     for node in bbn.nodes:
         args = get_args(node.func)
         if len(args) == 1:
             # If the func has only 1 arg
             # it means that it does not
             # specify a conditional probability
             # This is where H&D is a bit vague
             # but it seems to imply that we
             # do not assign it to any
             # clique.
             # Revising this for now as I dont
             # think its correct, I think
             # all CPTs need to be assigned
             # once and once only. The example
             # in H&D just happens to be a clique
             # that f_a could have been assigned
             # to but wasnt presumably because
             # it got assigned somewhere else.
             pass
             #continue
         # Now we need to find a cluster that
         # is a superset of the Family(v)
         # Family(v) is defined by D&H to
         # be the union of v and parents(v)
         family = set(args)
         # At this point we need to know which *variable*
         # a BBN node represents. Up to now we have
         # not *explicitely* specified this, however
         # we have been following some conventions
         # so we could just use this convention for
         # now. Need to come back to this to
         # perhaps establish the variable at
         # build bbn time...
         containing_cliques = [clique_node for clique_node in
                               self.clique_nodes if
                               (set(clique_node.variable_names).
                                issuperset(family))]
         assert len(containing_cliques) >= 1
         for clique in containing_cliques:
             if node in assigned:
                 # Make sure we assign all original
                 # PMFs only once each
                 continue
             assignments_by_clique[clique].append(node)
             assigned.add(node)
         assignments_by_family[tuple(family)] = containing_cliques
     return assignments_by_clique
 def marginal(self, val_dict):
     # The Joint marginal of the
     # neighbour variables of a factor
     # node is given by the product
     # of the incoming messages and the factor
     product = 1
     neighbours = self.neighbours
     for neighbour in neighbours:
         message = self.received_messages[neighbour.name]
         call_args = []
         for arg in get_args(message):
             call_args.append(val_dict[arg])
         if not call_args:
             call_args.append('dummy')
         product *= message(*call_args)
     # Finally we also need to multiply
     # by the factor itself
     call_args = []
     for arg in get_args(self.func):
         call_args.append(val_dict[arg])
     if not call_args:
         call_args.append('dummy')
     product *= self.func(*call_args)
     return product
def add_evidence(node, value):
    '''
    Set a variable node to an observed value.
    Note that for now this is achieved
    by modifying the factor functions
    which this node is connected to.
    After updating the factor nodes
    we need to re-run the sum-product
    algorithm. We also need to normalize
    all marginal outcomes.
    '''
    node.value = value
    neighbours = node.neighbours
    for factor_node in neighbours:
        if node.name in get_args(factor_node.func):
            factor_node.add_evidence(node, value)
def get_sample(ordering, evidence={}):
    '''
    Given a valid ordering, sample the network.
    '''
    sample = []
    sample_dict = dict()
    for var, func in ordering:
        r = random.random()
        total = 0
        for val in var.domain:
            test_var = VariableNode(var.name)
            test_var.value = val
            # Now we need to build the
            # argument list out of any
            # variables already in the sample
            # and this new test value in
            # the order required by the function.
            args = []
            for arg in get_args(func):
                if arg == var.name:
                    #args.append(test_var)
                    args.append(val)
                else:
                    args.append(sample_dict[arg].value)

            total += func(*args)
            if total > r:
                # We only want to use this sample
                # if it corresponds to the evidence value...
                if var.name in evidence:
                    if test_var.value == evidence[var.name]:
                        sample.append(test_var)
                        sample_dict[var.name] = test_var
                else:
                    sample.append(test_var)
                    sample_dict[var.name] = test_var
                break
        if not var.name in sample_dict:
            print 'Iterated through all values for %s and %s but no go...' \
                % (var.name, func.__name__)
            # This seems to mean that we have never seen this combination
            # of variables before, we can either discard it as irrelevant or
            # use some type of +1 smoothing???
            # What if we just randomly select some value for var????
            # lets try that as it seems the easiest....
            raise InvalidSampleException
    return sample
    def gaussianize(f):

        @wraps(f)
        def gaussianized(*args):
            x = args[0]
            return 1 / (sigma * (2 * math.pi) ** 0.5) * \
                math.exp((-(x - mu) ** 2) / (2 * sigma ** 2))

        gaussianized.mean = mu
        gaussianized.std_dev = sigma
        gaussianized.variance = sigma ** 2
        gaussianized.cdf = make_gaussian_cdf(mu, sigma)
        gaussianized.argspec = get_args(f)
        gaussianized.entropy = types.MethodType(
            lambda x: 0.5 * math.log(2 * math.pi * math.e * x.variance),
            gaussianized)

        return gaussianized
def make_not_sum_func(product_func, keep_var):
    '''
    Given a function with some set of
    arguments, and a single argument to keep,
    construct a new function only of the
    keep_var, summarized over all the other
    variables.

    For this branch we are trying to
    get rid of the requirement to have
    to use .value on arguments....
    Looks like its actually in the
    eliminate var...
    '''
    args = get_args(product_func)
    new_func = copy.deepcopy(product_func)
    for arg in args:
        if arg != keep_var:
            new_func = eliminate_var(new_func, arg)
            new_func = memoize(new_func)
    return new_func
Example #14
0
def get_original_factors(factors):
    """
    For a set of factors, we want to
    get a mapping of the variables to
    the factor which first introduces the
    variable to the set.
    To do this without enforcing a special
    naming convention such as 'f_' for factors,
    or a special ordering, such as the last
    argument is always the new variable,
    we will have to discover the 'original'
    factor that introduces the variable
    iteratively.
    """
    original_factors = dict()
    while len(original_factors) < len(factors):
        for factor in factors:
            args = get_args(factor)
            unaccounted_args = [a for a in args if a not in original_factors]
            if len(unaccounted_args) == 1:
                original_factors[unaccounted_args[0]] = factor
    return original_factors
    def add_evidence(self, node, value):
        '''
        Here we modify the factor function
        to return 0 whenever it is called
        with the observed variable having
        a value other than the observed value.
        '''
        args = get_args(self.func)
        pos = args.index(node.name)
        # Save the old func so that we
        # can remove the evidence later
        old_func = self.func
        self.cached_functions.insert(0, old_func)

        def evidence_func(*args):
            if args[pos] != value:
                return 0
            return old_func(*args)

        evidence_func.argspec = args
        evidence_func.domains = old_func.domains
        self.func = evidence_func
def discover_sample_ordering(graph):
    '''
    Try to get the order of variable nodes
    for sampling. This would be easier in
    the underlying BBN but lets try on
    the factor graph.
    '''
    iterations = 0
    ordering = []
    pmf_ordering = []
    accounted_for = set()
    variable_nodes = [n for n in graph.nodes if isinstance(n, VariableNode)]
    factor_nodes = [n for n in graph.nodes if isinstance(n, FactorNode)]
    required = len([n for n in graph.nodes if isinstance(n, VariableNode)])
    # Firstly any leaf factor nodes will
    # by definition only have one variable
    # node connection, therefore these
    # variables can be set first.
    for node in graph.get_leaves():
        if isinstance(node, FactorNode):
            ordering.append(node.neighbours[0])
            accounted_for.add(node.neighbours[0].name)
            pmf_ordering.append(node.func)

    # Now for each factor node whose variables
    # all but one are already in the ordering,
    # we can add that one variable. This is
    # actuall
    while len(ordering) < required:
        for node in factor_nodes:
            args = set(get_args(node.func))
            new_args = args.difference(accounted_for)
            if len(new_args) == 1:
                arg_name = list(new_args)[0]
                var_node = node.get_neighbour_by_name(arg_name)
                ordering.append(var_node)
                accounted_for.add(var_node.name)
                pmf_ordering.append(node.func)
    return zip(ordering, pmf_ordering)
    def product_func(*product_func_args):
        #import pytest; pytest.set_trace()
        #arg_dict = dict([(a.name, a) for a in product_func_args])
        arg_dict = dict(zip(args, product_func_args))
        #import pytest; pytest.set_trace()
        result = 1
        for factor in factors:
            #domains.update(factor.domains)
            # We need to build the correct argument
            # list to call this factor with.
            factor_args = []
            for arg in get_args(factor):
                if arg in arg_dict:
                    factor_args.append(arg_dict[arg])
            if not factor_args:
                # Since we always require
                # at least one argument we
                # insert a dummy argument
                # so that the unity function works.
                factor_args.append('dummy')
            result *= factor(*factor_args)

        return result
Example #18
0
 def __init__(self, factor):
     super(BBNNode, self).__init__(factor.__name__)
     self.func = factor
     self.argspec = get_args(factor)
Example #19
0
    def initialize_potentials(self, assignments, bbn, evidence={}):
        # Step 1, assign 1 to each cluster and sepset
        for node in self.nodes:
            tt = dict()
            vals = []
            variables = node.variable_names
            # Lets sort the variables here so that
            # the variable names in the keys in
            # the tt are always sorted.
            variables.sort()
            for variable in variables:
                domain = bbn.domains.get(variable, [True, False])
                vals.append(list(product([variable], domain)))
            permutations = product(*vals)
            for permutation in permutations:
                tt[permutation] = 1
            node.potential_tt = tt

        # Step 2: Note that in H&D the assignments are
        # done as part of step 2 however we have
        # seperated the assignment algorithm out and
        # done these prior to step 1.
        # Now for each assignment we want to
        # generate a truth-table from the
        # values of the bbn truth-tables that are
        # assigned to the clusters...

        for clique, bbn_nodes in assignments.iteritems():

            tt = dict()
            vals = []
            variables = list(clique.variable_names)
            variables.sort()
            for variable in variables:
                domain = bbn.domains.get(variable, [True, False])
                vals.append(list(product([variable], domain)))
            permutations = product(*vals)
            for permutation in permutations:
                argvals = dict(permutation)
                potential = 1
                for bbn_node in bbn_nodes:
                    bbn_node.clique = clique
                    # We could handle evidence here
                    # by altering the potential_tt.
                    # This is slightly different to
                    # the way that H&D do it.

                    arg_list = []
                    for arg_name in get_args(bbn_node.func):
                        arg_list.append(argvals[arg_name])

                    potential *= bbn_node.func(*arg_list)
                tt[permutation] = potential
            clique.potential_tt = tt

        if not evidence:
            # We dont need to deal with likelihoods
            # if we dont have any evidence.
            return

        # Step 2b: Set each liklihood element ^V(v) to 1
        likelihoods = self.initial_likelihoods(assignments, bbn)
        for clique, bbn_nodes in assignments.iteritems():
            for node in bbn_nodes:
                if node.variable_name in evidence:
                    for k, v in clique.potential_tt.items():
                        # Encode the evidence in
                        # the clique potential...
                        for variable, value in k:
                            if (variable == node.variable_name):
                                if value != evidence[variable]:
                                    clique.potential_tt[k] = 0
Example #20
0
def build_gbn(*args, **kwds):
    '''Builds a Gaussian Bayesian Graph from
    a list of functions'''
    variables = set()
    name = kwds.get('name')
    variable_nodes = dict()
    factor_nodes = dict()

    if isinstance(args[0], list):
        # Assume the functions were all
        # passed in a list in the first
        # argument. This makes it possible
        # to build very large graphs with
        # more than 255 functions, since
        # Python functions are limited to
        # 255 arguments.
        args = args[0]

    for factor in args:
        factor_args = get_args(factor)
        variables.update(factor_args)
        node = GBNNode(factor)
        factor_nodes[factor.__name__] = node

    # Now lets create the connections
    # To do this we need to find the
    # factor node representing the variables
    # in a child factors argument and connect
    # it to the child node.
    # Note that calling original_factors
    # here can break build_gbn if the
    # factors do not correctly represent
    # a valid network. This will be fixed
    # in next release
    original_factors = get_original_factors(factor_nodes.values())
    for var_name, factor in original_factors.items():
        factor.variable_name = var_name
    for factor_node in factor_nodes.values():
        factor_args = get_args(factor_node)
        parents = [
            original_factors[arg] for arg in factor_args
            if original_factors[arg] != factor_node
        ]
        for parent in parents:
            connect(parent, factor_node)
    # Now process the raw_betas to create a dict
    for factor_node in factor_nodes.values():
        # Now we want betas to always be a dict
        # but in the case that the node only
        # has one parent we will allow the user to specify
        # the single beta for that parent simply
        # as a number and not a dict.
        if hasattr(factor_node.func, 'raw_betas'):
            if isinstance(factor_node.func.raw_betas, Number):
                # Make sure that if they supply a number
                # there is only one parent
                assert len(get_args(factor_node)) == 2
                betas = dict()
                for arg in get_args(factor_node):
                    if arg != factor_node.variable_name:
                        betas[arg] = factor_node.func.raw_betas
                factor_node.func.betas = betas
            else:
                factor_node.func.betas = factor_node.func.raw_betas
    gbn = GaussianBayesianGraph(original_factors, name=name)
    # Now for any conditional gaussian nodes
    # we need to tell the node function what the
    # parent parameters are so that the pdf can
    # be computed.
    sorted = gbn.get_topological_sort()
    joint_mu, joint_sigma = gbn.get_joint_parameters()
    for node in sorted:
        if hasattr(node.func, 'betas'):
            # This means its multivariate gaussian
            names = [n.variable_name
                     for n in node.parents] + [node.variable_name]
            node.func.joint_mu = MeansVector.zeros((len(names), 1),
                                                   names=names)
            for name in names:
                node.func.joint_mu[name] = joint_mu[name][0, 0]
            node.func.covariance_matrix = CovarianceMatrix.zeros(
                (len(names), len(names)), names)
            for row, col in xproduct(names, names):
                node.func.covariance_matrix[row, col] = joint_sigma[row, col]
    return gbn
    def verify(self):
        '''
        Check several properties of the Factor Graph
        that should hold.
        '''
        # Check that all nodes are either
        # instances of classes derived from
        # VariableNode or FactorNode.
        # It is a very common error to instantiate
        # the graph with the factor function
        # instead of the corresponding factor
        # node.
        for node in self.nodes:
            if not isinstance(node, VariableNode) and \
                    not isinstance(node, FactorNode):
                bases = node.__class__.__bases__
                if not VariableNode in bases and not FactorNode in bases:
                    print(('Factor Graph does not '
                           'support nodes of type: %s' % node.__class__))
                    raise InvalidGraphException
        # First check that for each node
        # only connects to nodes of the
        # other type.
        print('Checking neighbour node types...')
        for node in self.nodes:
            if not node.verify_neighbour_types():
                print('%s has invalid neighbour type.' % node)
                return False
        print('Checking that all factor functions have domains...')
        for node in self.nodes:
            if isinstance(node, FactorNode):
                if not hasattr(node.func, 'domains'):
                    print('%s has no domains.' % node)
                    raise InvalidGraphException
                elif not node.func.domains:
                    # Also check for an empty domain dict!
                    print('%s has empty domains.' % node)
                    raise InvalidGraphException
        print('Checking that all variables are accounted for' + \
            ' by at least one function...')
        variables = set(
            [vn.name for vn in self.nodes if isinstance(vn, VariableNode)])

        largs = [
            get_args(fn.func) for fn in self.nodes
            if isinstance(fn, FactorNode)
        ]

        args = set(reduce(lambda x, y: x + y, largs))

        if not variables.issubset(args):
            print('These variables are not used in any factors nodes: ')
            print(variables.difference(args))
            return False
        print('Checking that all arguments have matching variable nodes...')
        if not args.issubset(variables):
            print('These arguments have missing variables:')
            print(args.difference(variables))
            return False
        print('Checking that graph has at least one leaf node...')
        leaf_nodes = [x for x in self.nodes if x.is_leaf()]
        if not leaf_nodes:
            print('Graph has no leaf nodes.')
            raise InvalidGraphException
        return True
def build_gbn(*args, **kwds):
    '''Builds a Gaussian Bayesian Graph from
    a list of functions'''
    variables = set()
    name = kwds.get('name')
    variable_nodes = dict()
    factor_nodes = dict()

    if isinstance(args[0], list):
        # Assume the functions were all
        # passed in a list in the first
        # argument. This makes it possible
        # to build very large graphs with
        # more than 255 functions, since
        # Python functions are limited to
        # 255 arguments.
        args = args[0]

    for factor in args:
        factor_args = get_args(factor)
        variables.update(factor_args)
        node = GBNNode(factor)
        factor_nodes[factor.__name__] = node

    # Now lets create the connections
    # To do this we need to find the
    # factor node representing the variables
    # in a child factors argument and connect
    # it to the child node.
    # Note that calling original_factors
    # here can break build_gbn if the
    # factors do not correctly represent
    # a valid network. This will be fixed
    # in next release
    original_factors = get_original_factors(list(factor_nodes.values()))
    for var_name, factor in list(original_factors.items()):
        factor.variable_name = var_name
    for factor_node in list(factor_nodes.values()):
        factor_args = get_args(factor_node)
        parents = [original_factors[arg] for arg in
                   factor_args if original_factors[arg] != factor_node]
        for parent in parents:
            connect(parent, factor_node)
    # Now process the raw_betas to create a dict
    for factor_node in list(factor_nodes.values()):
        # Now we want betas to always be a dict
        # but in the case that the node only
        # has one parent we will allow the user to specify
        # the single beta for that parent simply
        # as a number and not a dict.
        if hasattr(factor_node.func, 'raw_betas'):
            if isinstance(factor_node.func.raw_betas, Number):
                # Make sure that if they supply a number
                # there is only one parent
                assert len(get_args(factor_node)) == 2
                betas = dict()
                for arg in get_args(factor_node):
                    if arg != factor_node.variable_name:
                        betas[arg] = factor_node.func.raw_betas
                factor_node.func.betas = betas
            else:
                factor_node.func.betas = factor_node.func.raw_betas
    gbn = GaussianBayesianGraph(original_factors, name=name)
    # Now for any conditional gaussian nodes
    # we need to tell the node function what the
    # parent parameters are so that the pdf can
    # be computed.
    sorted = gbn.get_topological_sort()
    joint_mu, joint_sigma = gbn.get_joint_parameters()
    for node in sorted:
        if hasattr(node.func, 'betas'):
            # This means its multivariate gaussian
            names = [n.variable_name for n in node.parents] + [node.variable_name]
            node.func.joint_mu = MeansVector.zeros((len(names), 1), names=names)
            for name in names:
                node.func.joint_mu[name] = joint_mu[name][0, 0]
            node.func.covariance_matrix = CovarianceMatrix.zeros(
                (len(names), len(names)), names)
            for row, col in xproduct(names, names):
                node.func.covariance_matrix[row, col] = joint_sigma[row, col]
    return gbn
 def __init__(self, source, destination, factors, func):
     self.source = source
     self.destination = destination
     self.factors = factors
     self.argspec = get_args(func)
     self.func = func
Example #24
0
 def __init__(self, factor):
     super(BBNNode, self).__init__(factor.__name__)
     self.func = factor
     self.argspec = get_args(factor)
 def __repr__(self):
     return '<FactorNode %s %s(%s)>' % \
         (self.name,
          self.func.__name__,
          get_args(self.func))
def eliminate_var(f, var):
    '''
    Given a function f return a new
    function which sums over the variable
    we want to eliminate

    This may be where we have the opportunity
    to remove the use of .value....

    '''
    arg_spec = get_args(f)
    pos = arg_spec.index(var)
    new_spec = arg_spec[:]
    new_spec.remove(var)
    # Lets say the orginal argspec is
    # ('a', 'b', 'c', 'd') and they
    # are all Booleans
    # Now lets say we want to eliminate c
    # This means we want to sum over
    # f(a, b, True, d) and f(a, b, False, d)
    # Seems like all we have to do is know
    # the positionn of c and thats it???
    # Ok so its not as simple as that...
    # this is because when the *call* is made
    # to the eliminated function, as opposed
    # to when its built then its only
    # called with ('a', 'b', 'd')
    eliminated_pos = arg_spec.index(var)

    def eliminated(*args):
        template = arg_spec[:]
        total = 0
        call_args = template[:]
        i = 0
        for arg in args:
            # To be able to remove .value we
            # first need to also be able to
            # remove .name in fact .value is
            # just a side effect of having to
            # rely on .name. This means we
            # probably need to construct a
            # a list containing the names
            # of the args based on the position
            # they are being called.
            if i == eliminated_pos:
                # We need to increment i
                # once more to skip over
                # the variable being marginalized
                call_args[i] = 'marginalize me!'
                i += 1
            call_args[i] = arg
            i += 1

        for val in f.domains[var]:
            #v = VariableNode(name=var)
            #v.value = val
            #call_args[pos] = v
            call_args[pos] = val
            total += f(*call_args)
        return total

    eliminated.argspec = new_spec
    eliminated.domains = f.domains
    #eliminated.__name__ = f.__name__
    return eliminated
    def verify(self):
        '''
        Check several properties of the Factor Graph
        that should hold.
        '''
        # Check that all nodes are either
        # instances of classes derived from
        # VariableNode or FactorNode.
        # It is a very common error to instantiate
        # the graph with the factor function
        # instead of the corresponding factor
        # node.
        for node in self.nodes:
            if not isinstance(node, VariableNode) and \
                    not isinstance(node, FactorNode):
                bases = node.__class__.__bases__
                if not VariableNode in bases and not FactorNode in bases:
                    print ('Factor Graph does not '
                           'support nodes of type: %s' % node.__class__)
                    raise InvalidGraphException
        # First check that for each node
        # only connects to nodes of the
        # other type.
        print 'Checking neighbour node types...'
        for node in self.nodes:
            if not node.verify_neighbour_types():
                print '%s has invalid neighbour type.' % node
                return False
        print 'Checking that all factor functions have domains...'
        for node in self.nodes:
            if isinstance(node, FactorNode):
                if not hasattr(node.func, 'domains'):
                    print '%s has no domains.' % node
                    raise InvalidGraphException
                elif not node.func.domains:
                    # Also check for an empty domain dict!
                    print '%s has empty domains.' % node
                    raise InvalidGraphException
        print 'Checking that all variables are accounted for' + \
            ' by at least one function...'
        variables = set([vn.name for vn in self.nodes
                         if isinstance(vn, VariableNode)])

        largs = [get_args(fn.func) for fn in
                 self.nodes if isinstance(fn, FactorNode)]

        args = set(reduce(lambda x, y: x + y, largs))

        if not variables.issubset(args):
            print 'These variables are not used in any factors nodes: '
            print variables.difference(args)
            return False
        print 'Checking that all arguments have matching variable nodes...'
        if not args.issubset(variables):
            print 'These arguments have missing variables:'
            print args.difference(variables)
            return False
        print 'Checking that graph has at least one leaf node...'
        leaf_nodes = filter(
            lambda x: x.is_leaf(),
            self.nodes)
        if not leaf_nodes:
            print 'Graph has no leaf nodes.'
            raise InvalidGraphException
        return True
def eliminate_var(f, var):
    '''
    Given a function f return a new
    function which sums over the variable
    we want to eliminate

    This may be where we have the opportunity
    to remove the use of .value....

    '''
    arg_spec = get_args(f)
    pos = arg_spec.index(var)
    new_spec = arg_spec[:]
    new_spec.remove(var)
    # Lets say the orginal argspec is
    # ('a', 'b', 'c', 'd') and they
    # are all Booleans
    # Now lets say we want to eliminate c
    # This means we want to sum over
    # f(a, b, True, d) and f(a, b, False, d)
    # Seems like all we have to do is know
    # the positionn of c and thats it???
    # Ok so its not as simple as that...
    # this is because when the *call* is made
    # to the eliminated function, as opposed
    # to when its built then its only
    # called with ('a', 'b', 'd')
    eliminated_pos = arg_spec.index(var)

    def eliminated(*args):
        template = arg_spec[:]
        total = 0
        call_args = template[:]
        i = 0
        for arg in args:
            # To be able to remove .value we
            # first need to also be able to
            # remove .name in fact .value is
            # just a side effect of having to
            # rely on .name. This means we
            # probably need to construct a
            # a list containing the names
            # of the args based on the position
            # they are being called.
            if i == eliminated_pos:
                # We need to increment i
                # once more to skip over
                # the variable being marginalized
                call_args[i] = 'marginalize me!'
                i += 1
            call_args[i] = arg
            i += 1

        for val in f.domains[var]:
            #v = VariableNode(name=var)
            #v.value = val
            #call_args[pos] = v
            call_args[pos] = val
            total += f(*call_args)
        return total

    eliminated.argspec = new_spec
    eliminated.domains = f.domains
    #eliminated.__name__ = f.__name__
    return eliminated
Example #29
0
    def initialize_potentials(self, assignments, bbn, evidence={}):
        # Step 1, assign 1 to each cluster and sepset
        for node in self.nodes:
            tt = dict()
            vals = []
            variables = node.variable_names
            # Lets sort the variables here so that
            # the variable names in the keys in
            # the tt are always sorted.
            variables.sort()
            for variable in variables:
                domain = bbn.domains.get(variable, [True, False])
                vals.append(list(product([variable], domain)))
            permutations = product(*vals)
            for permutation in permutations:
                tt[permutation] = 1
            node.potential_tt = tt

        # Step 2: Note that in H&D the assignments are
        # done as part of step 2 however we have
        # seperated the assignment algorithm out and
        # done these prior to step 1.
        # Now for each assignment we want to
        # generate a truth-table from the
        # values of the bbn truth-tables that are
        # assigned to the clusters...

        for clique, bbn_nodes in assignments.iteritems():

            tt = dict()
            vals = []
            variables = list(clique.variable_names)
            variables.sort()
            for variable in variables:
                domain = bbn.domains.get(variable, [True, False])
                vals.append(list(product([variable], domain)))
            permutations = product(*vals)
            for permutation in permutations:
                argvals = dict(permutation)
                potential = 1
                for bbn_node in bbn_nodes:
                    bbn_node.clique = clique
                    # We could handle evidence here
                    # by altering the potential_tt.
                    # This is slightly different to
                    # the way that H&D do it.

                    arg_list = []
                    for arg_name in get_args(bbn_node.func):
                        arg_list.append(argvals[arg_name])

                    potential *= bbn_node.func(*arg_list)
                tt[permutation] = potential
            clique.potential_tt = tt

        if not evidence:
            # We dont need to deal with likelihoods
            # if we dont have any evidence.
            return

        # Step 2b: Set each liklihood element ^V(v) to 1
        likelihoods = self.initial_likelihoods(assignments, bbn)
        for clique, bbn_nodes in assignments.iteritems():
            for node in bbn_nodes:
                if node.variable_name in evidence:
                    for k, v in clique.potential_tt.items():
                        # Encode the evidence in
                        # the clique potential...
                        for variable, value in k:
                            if (variable == node.variable_name):
                                if value != evidence[variable]:
                                    clique.potential_tt[k] = 0
 def __repr__(self):
     return '<FactorNode %s %s(%s)>' % \
         (self.name,
          self.func.__name__,
          get_args(self.func))