def __init__(self, nodes, name=None, n_samples=100): self.nodes = nodes self._inference_method = 'sumproduct' # We need to divine the domains for Factor nodes here... # First compile a mapping of factors to variables # from the arg spec... function_args = dict() arg_domains = dict() for node in self.nodes: if isinstance(node, VariableNode): #if not hasattr(node, 'domain'): # node.domain = [True, False] arg_domains[node.name] = node.domain elif isinstance(node, FactorNode): function_args[node.func.__name__] = get_args(node.func) # Now if the domains for the # factor functions have not been explicitely # set we create them based on the variable # values it can take. for node in self.nodes: if isinstance(node, FactorNode): if hasattr(node.func, 'domains'): continue domains = dict() for arg in get_args(node.func): if not arg in arg_domains: print 'WARNING: missing variable for arg:%s' % arg else: domains.update({arg: arg_domains[arg]}) node.func.domains = domains self.name = name self.n_samples = n_samples # Now try to set the mode of inference.. try: if self.has_cycles(): # Currently only sampling # is supported for cyclic graphs self.inference_method = 'sample' else: # The sumproduct method will # give exact likelihoods but # only of the graph contains # no cycles. self.inference_method = 'sumproduct' except: print 'Failed to determine if graph has cycles, ' 'setting inference to sample.' self.inference_method = 'sample' self.enforce_minimum_samples = False
def make_product_func(factors): ''' Return a single callable from a list of factors which correctly applies the arguments to each individual factor. The challenge here is to return a function whose argument list we know and ensure that when this function is called, its always called with the correct arguments. Since the correct argspec is attached to the built function it seems that it should be up to the caller to get the argument list correct. So we need to determine when and where its called... ''' args_map = {} all_args = [] domains = {} for factor in factors: #if factor == 1: # continue args_map[factor] = get_args(factor) all_args += args_map[factor] if hasattr(factor, 'domains'): domains.update(factor.domains) args = list(set(all_args)) # Perhaps if we sort the def product_func(*product_func_args): #import pytest; pytest.set_trace() #arg_dict = dict([(a.name, a) for a in product_func_args]) arg_dict = dict(zip(args, product_func_args)) #import pytest; pytest.set_trace() result = 1 for factor in factors: #domains.update(factor.domains) # We need to build the correct argument # list to call this factor with. factor_args = [] for arg in get_args(factor): if arg in arg_dict: factor_args.append(arg_dict[arg]) if not factor_args: # Since we always require # at least one argument we # insert a dummy argument # so that the unity function works. factor_args.append('dummy') result *= factor(*factor_args) return result product_func.argspec = args product_func.factors = factors product_func.domains = domains return memoize(product_func)
def conditional_gaussianize(f): @wraps(f) def conditional_gaussianized(*args, **kwds): '''Since this function will never be called directly we dont need anything here. ''' # First we need to construct a vector # out of the args... x = zeros((len(args), 1)) for i, a in enumerate(args): x[i, 0] = a sigma = conditional_gaussianized.covariance_matrix mu = conditional_gaussianized.joint_mu return 1 / (2 * math.pi * sigma.det()) ** 0.5 \ * math.exp(-0.5 * ((x - mu).T * sigma.I * (x - mu))[0, 0]) conditional_gaussianized.mean = mu conditional_gaussianized.std_dev = sigma conditional_gaussianized.variance = sigma ** 2 conditional_gaussianized.raw_betas = betas conditional_gaussianized.argspec = get_args(f) conditional_gaussianized.entropy = types.MethodType( lambda x: len(x.joint_mu) / 2 * \ (1 + math.log(2 * math.pi)) + \ 0.5 * math.log(x.covariance_matrix.det()), conditional_gaussianized) # NOTE: the joint parameters are # add to this function at the time of the # graph construction return conditional_gaussianized
def __init__(self, source, destination, factors, func): self.source = source self.destination = destination self.factors = factors self.func = func self.argspec = get_args(func) self.domains = func.domains
def build_graph(*args, **kwds): ''' Automatically create all the variable and factor nodes using only function definitions. Since its cumbersome to supply the domains for variable nodes via the factor domains perhaps we should allow a domains dict? ''' # Lets start off identifying all the # variables by introspecting the # functions. variables = set() domains = kwds.get('domains', {}) name = kwds.get('name') variable_nodes = dict() factor_nodes = [] if isinstance(args[0], list): # Assume the functions were all # passed in a list in the first # argument. This makes it possible # to build very large graphs with # more than 255 functions. args = args[0] for factor in args: factor_args = get_args(factor) variables.update(factor_args) factor_node = FactorNode(factor.__name__, factor) #factor_node.func.domains = domains # Bit of a hack for now we should actually exclude variables that # are not parameters of this function factor_nodes.append(factor_node) for variable in variables: node = VariableNode( variable, domain=domains.get(variable, [True, False])) variable_nodes[variable] = node # Now we have to connect each factor node # to its variable nodes for factor_node in factor_nodes: factor_args = get_args(factor_node.func) connect(factor_node, [variable_nodes[x] for x in factor_args]) graph = FactorGraph(variable_nodes.values() + factor_nodes, name=name) #print domains return graph
def build_bbn(*args, **kwds): '''Builds a BBN Graph from a list of functions and domains''' variables = set() domains = kwds.get('domains', {}) name = kwds.get('name') variable_nodes = dict() factor_nodes = dict() if isinstance(args[0], list): # Assume the functions were all # passed in a list in the first # argument. This makes it possible # to build very large graphs with # more than 255 functions, since # Python functions are limited to # 255 arguments. args = args[0] for factor in args: factor_args = get_args(factor) variables.update(factor_args) bbn_node = BBNNode(factor) factor_nodes[factor.__name__] = bbn_node # Now lets create the connections # To do this we need to find the # factor node representing the variables # in a child factors argument and connect # it to the child node. # Note that calling original_factors # here can break build_bbn if the # factors do not correctly represent # a BBN. original_factors = get_original_factors(factor_nodes.values()) for factor_node in factor_nodes.values(): factor_args = get_args(factor_node) parents = [original_factors[arg] for arg in factor_args if original_factors[arg] != factor_node] for parent in parents: connect(parent, factor_node) bbn = BBN(original_factors, name=name) bbn.domains = domains return bbn
def make_factor_node_message(node, target_node): ''' The rules for a factor node are: take the product of all the incoming messages (except for the destination node) and then take the sum over all the variables except for the destination variable. >>> def f(x1, x2, x3): pass >>> node = object() >>> node.func = f >>> target_node = object() >>> target_node.name = 'x2' >>> make_factor_node_message(node, target_node) ''' if node.is_leaf(): not_sum_func = make_not_sum_func(node.func, target_node.name) message = FactorMessage(node, target_node, [node.func], not_sum_func) return message args = set(get_args(node.func)) # Compile list of factors for message factors = [node.func] # Now add the message that came from each # of the non-destination neighbours... neighbours = node.neighbours for neighbour in neighbours: if neighbour == target_node: continue # When we pass on a message, we unwrap # the original payload and wrap it # in new headers, this is purely # to verify the procedure is correct # according to usual nomenclature in_message = node.received_messages[neighbour.name] if in_message.destination != node: out_message = VariableMessage( neighbour, node, in_message.factors, in_message.func) out_message.argspec = in_message.argspec else: out_message = in_message factors.append(out_message) product_func = make_product_func(factors) not_sum_func = make_not_sum_func(product_func, target_node.name) message = FactorMessage(node, target_node, factors, not_sum_func) return message
def assign_clusters(self, bbn): assignments_by_family = dict() assignments_by_clique = defaultdict(list) assigned = set() for node in bbn.nodes: args = get_args(node.func) if len(args) == 1: # If the func has only 1 arg # it means that it does not # specify a conditional probability # This is where H&D is a bit vague # but it seems to imply that we # do not assign it to any # clique. # Revising this for now as I dont # think its correct, I think # all CPTs need to be assigned # once and once only. The example # in H&D just happens to be a clique # that f_a could have been assigned # to but wasnt presumably because # it got assigned somewhere else. pass #continue # Now we need to find a cluster that # is a superset of the Family(v) # Family(v) is defined by D&H to # be the union of v and parents(v) family = set(args) # At this point we need to know which *variable* # a BBN node represents. Up to now we have # not *explicitely* specified this, however # we have been following some conventions # so we could just use this convention for # now. Need to come back to this to # perhaps establish the variable at # build bbn time... containing_cliques = [clique_node for clique_node in self.clique_nodes if (set(clique_node.variable_names). issuperset(family))] assert len(containing_cliques) >= 1 for clique in containing_cliques: if node in assigned: # Make sure we assign all original # PMFs only once each continue assignments_by_clique[clique].append(node) assigned.add(node) assignments_by_family[tuple(family)] = containing_cliques return assignments_by_clique
def marginal(self, val_dict): # The Joint marginal of the # neighbour variables of a factor # node is given by the product # of the incoming messages and the factor product = 1 neighbours = self.neighbours for neighbour in neighbours: message = self.received_messages[neighbour.name] call_args = [] for arg in get_args(message): call_args.append(val_dict[arg]) if not call_args: call_args.append('dummy') product *= message(*call_args) # Finally we also need to multiply # by the factor itself call_args = [] for arg in get_args(self.func): call_args.append(val_dict[arg]) if not call_args: call_args.append('dummy') product *= self.func(*call_args) return product
def add_evidence(node, value): ''' Set a variable node to an observed value. Note that for now this is achieved by modifying the factor functions which this node is connected to. After updating the factor nodes we need to re-run the sum-product algorithm. We also need to normalize all marginal outcomes. ''' node.value = value neighbours = node.neighbours for factor_node in neighbours: if node.name in get_args(factor_node.func): factor_node.add_evidence(node, value)
def get_sample(ordering, evidence={}): ''' Given a valid ordering, sample the network. ''' sample = [] sample_dict = dict() for var, func in ordering: r = random.random() total = 0 for val in var.domain: test_var = VariableNode(var.name) test_var.value = val # Now we need to build the # argument list out of any # variables already in the sample # and this new test value in # the order required by the function. args = [] for arg in get_args(func): if arg == var.name: #args.append(test_var) args.append(val) else: args.append(sample_dict[arg].value) total += func(*args) if total > r: # We only want to use this sample # if it corresponds to the evidence value... if var.name in evidence: if test_var.value == evidence[var.name]: sample.append(test_var) sample_dict[var.name] = test_var else: sample.append(test_var) sample_dict[var.name] = test_var break if not var.name in sample_dict: print 'Iterated through all values for %s and %s but no go...' \ % (var.name, func.__name__) # This seems to mean that we have never seen this combination # of variables before, we can either discard it as irrelevant or # use some type of +1 smoothing??? # What if we just randomly select some value for var???? # lets try that as it seems the easiest.... raise InvalidSampleException return sample
def gaussianize(f): @wraps(f) def gaussianized(*args): x = args[0] return 1 / (sigma * (2 * math.pi) ** 0.5) * \ math.exp((-(x - mu) ** 2) / (2 * sigma ** 2)) gaussianized.mean = mu gaussianized.std_dev = sigma gaussianized.variance = sigma ** 2 gaussianized.cdf = make_gaussian_cdf(mu, sigma) gaussianized.argspec = get_args(f) gaussianized.entropy = types.MethodType( lambda x: 0.5 * math.log(2 * math.pi * math.e * x.variance), gaussianized) return gaussianized
def make_not_sum_func(product_func, keep_var): ''' Given a function with some set of arguments, and a single argument to keep, construct a new function only of the keep_var, summarized over all the other variables. For this branch we are trying to get rid of the requirement to have to use .value on arguments.... Looks like its actually in the eliminate var... ''' args = get_args(product_func) new_func = copy.deepcopy(product_func) for arg in args: if arg != keep_var: new_func = eliminate_var(new_func, arg) new_func = memoize(new_func) return new_func
def get_original_factors(factors): """ For a set of factors, we want to get a mapping of the variables to the factor which first introduces the variable to the set. To do this without enforcing a special naming convention such as 'f_' for factors, or a special ordering, such as the last argument is always the new variable, we will have to discover the 'original' factor that introduces the variable iteratively. """ original_factors = dict() while len(original_factors) < len(factors): for factor in factors: args = get_args(factor) unaccounted_args = [a for a in args if a not in original_factors] if len(unaccounted_args) == 1: original_factors[unaccounted_args[0]] = factor return original_factors
def add_evidence(self, node, value): ''' Here we modify the factor function to return 0 whenever it is called with the observed variable having a value other than the observed value. ''' args = get_args(self.func) pos = args.index(node.name) # Save the old func so that we # can remove the evidence later old_func = self.func self.cached_functions.insert(0, old_func) def evidence_func(*args): if args[pos] != value: return 0 return old_func(*args) evidence_func.argspec = args evidence_func.domains = old_func.domains self.func = evidence_func
def discover_sample_ordering(graph): ''' Try to get the order of variable nodes for sampling. This would be easier in the underlying BBN but lets try on the factor graph. ''' iterations = 0 ordering = [] pmf_ordering = [] accounted_for = set() variable_nodes = [n for n in graph.nodes if isinstance(n, VariableNode)] factor_nodes = [n for n in graph.nodes if isinstance(n, FactorNode)] required = len([n for n in graph.nodes if isinstance(n, VariableNode)]) # Firstly any leaf factor nodes will # by definition only have one variable # node connection, therefore these # variables can be set first. for node in graph.get_leaves(): if isinstance(node, FactorNode): ordering.append(node.neighbours[0]) accounted_for.add(node.neighbours[0].name) pmf_ordering.append(node.func) # Now for each factor node whose variables # all but one are already in the ordering, # we can add that one variable. This is # actuall while len(ordering) < required: for node in factor_nodes: args = set(get_args(node.func)) new_args = args.difference(accounted_for) if len(new_args) == 1: arg_name = list(new_args)[0] var_node = node.get_neighbour_by_name(arg_name) ordering.append(var_node) accounted_for.add(var_node.name) pmf_ordering.append(node.func) return zip(ordering, pmf_ordering)
def product_func(*product_func_args): #import pytest; pytest.set_trace() #arg_dict = dict([(a.name, a) for a in product_func_args]) arg_dict = dict(zip(args, product_func_args)) #import pytest; pytest.set_trace() result = 1 for factor in factors: #domains.update(factor.domains) # We need to build the correct argument # list to call this factor with. factor_args = [] for arg in get_args(factor): if arg in arg_dict: factor_args.append(arg_dict[arg]) if not factor_args: # Since we always require # at least one argument we # insert a dummy argument # so that the unity function works. factor_args.append('dummy') result *= factor(*factor_args) return result
def __init__(self, factor): super(BBNNode, self).__init__(factor.__name__) self.func = factor self.argspec = get_args(factor)
def initialize_potentials(self, assignments, bbn, evidence={}): # Step 1, assign 1 to each cluster and sepset for node in self.nodes: tt = dict() vals = [] variables = node.variable_names # Lets sort the variables here so that # the variable names in the keys in # the tt are always sorted. variables.sort() for variable in variables: domain = bbn.domains.get(variable, [True, False]) vals.append(list(product([variable], domain))) permutations = product(*vals) for permutation in permutations: tt[permutation] = 1 node.potential_tt = tt # Step 2: Note that in H&D the assignments are # done as part of step 2 however we have # seperated the assignment algorithm out and # done these prior to step 1. # Now for each assignment we want to # generate a truth-table from the # values of the bbn truth-tables that are # assigned to the clusters... for clique, bbn_nodes in assignments.iteritems(): tt = dict() vals = [] variables = list(clique.variable_names) variables.sort() for variable in variables: domain = bbn.domains.get(variable, [True, False]) vals.append(list(product([variable], domain))) permutations = product(*vals) for permutation in permutations: argvals = dict(permutation) potential = 1 for bbn_node in bbn_nodes: bbn_node.clique = clique # We could handle evidence here # by altering the potential_tt. # This is slightly different to # the way that H&D do it. arg_list = [] for arg_name in get_args(bbn_node.func): arg_list.append(argvals[arg_name]) potential *= bbn_node.func(*arg_list) tt[permutation] = potential clique.potential_tt = tt if not evidence: # We dont need to deal with likelihoods # if we dont have any evidence. return # Step 2b: Set each liklihood element ^V(v) to 1 likelihoods = self.initial_likelihoods(assignments, bbn) for clique, bbn_nodes in assignments.iteritems(): for node in bbn_nodes: if node.variable_name in evidence: for k, v in clique.potential_tt.items(): # Encode the evidence in # the clique potential... for variable, value in k: if (variable == node.variable_name): if value != evidence[variable]: clique.potential_tt[k] = 0
def build_gbn(*args, **kwds): '''Builds a Gaussian Bayesian Graph from a list of functions''' variables = set() name = kwds.get('name') variable_nodes = dict() factor_nodes = dict() if isinstance(args[0], list): # Assume the functions were all # passed in a list in the first # argument. This makes it possible # to build very large graphs with # more than 255 functions, since # Python functions are limited to # 255 arguments. args = args[0] for factor in args: factor_args = get_args(factor) variables.update(factor_args) node = GBNNode(factor) factor_nodes[factor.__name__] = node # Now lets create the connections # To do this we need to find the # factor node representing the variables # in a child factors argument and connect # it to the child node. # Note that calling original_factors # here can break build_gbn if the # factors do not correctly represent # a valid network. This will be fixed # in next release original_factors = get_original_factors(factor_nodes.values()) for var_name, factor in original_factors.items(): factor.variable_name = var_name for factor_node in factor_nodes.values(): factor_args = get_args(factor_node) parents = [ original_factors[arg] for arg in factor_args if original_factors[arg] != factor_node ] for parent in parents: connect(parent, factor_node) # Now process the raw_betas to create a dict for factor_node in factor_nodes.values(): # Now we want betas to always be a dict # but in the case that the node only # has one parent we will allow the user to specify # the single beta for that parent simply # as a number and not a dict. if hasattr(factor_node.func, 'raw_betas'): if isinstance(factor_node.func.raw_betas, Number): # Make sure that if they supply a number # there is only one parent assert len(get_args(factor_node)) == 2 betas = dict() for arg in get_args(factor_node): if arg != factor_node.variable_name: betas[arg] = factor_node.func.raw_betas factor_node.func.betas = betas else: factor_node.func.betas = factor_node.func.raw_betas gbn = GaussianBayesianGraph(original_factors, name=name) # Now for any conditional gaussian nodes # we need to tell the node function what the # parent parameters are so that the pdf can # be computed. sorted = gbn.get_topological_sort() joint_mu, joint_sigma = gbn.get_joint_parameters() for node in sorted: if hasattr(node.func, 'betas'): # This means its multivariate gaussian names = [n.variable_name for n in node.parents] + [node.variable_name] node.func.joint_mu = MeansVector.zeros((len(names), 1), names=names) for name in names: node.func.joint_mu[name] = joint_mu[name][0, 0] node.func.covariance_matrix = CovarianceMatrix.zeros( (len(names), len(names)), names) for row, col in xproduct(names, names): node.func.covariance_matrix[row, col] = joint_sigma[row, col] return gbn
def verify(self): ''' Check several properties of the Factor Graph that should hold. ''' # Check that all nodes are either # instances of classes derived from # VariableNode or FactorNode. # It is a very common error to instantiate # the graph with the factor function # instead of the corresponding factor # node. for node in self.nodes: if not isinstance(node, VariableNode) and \ not isinstance(node, FactorNode): bases = node.__class__.__bases__ if not VariableNode in bases and not FactorNode in bases: print(('Factor Graph does not ' 'support nodes of type: %s' % node.__class__)) raise InvalidGraphException # First check that for each node # only connects to nodes of the # other type. print('Checking neighbour node types...') for node in self.nodes: if not node.verify_neighbour_types(): print('%s has invalid neighbour type.' % node) return False print('Checking that all factor functions have domains...') for node in self.nodes: if isinstance(node, FactorNode): if not hasattr(node.func, 'domains'): print('%s has no domains.' % node) raise InvalidGraphException elif not node.func.domains: # Also check for an empty domain dict! print('%s has empty domains.' % node) raise InvalidGraphException print('Checking that all variables are accounted for' + \ ' by at least one function...') variables = set( [vn.name for vn in self.nodes if isinstance(vn, VariableNode)]) largs = [ get_args(fn.func) for fn in self.nodes if isinstance(fn, FactorNode) ] args = set(reduce(lambda x, y: x + y, largs)) if not variables.issubset(args): print('These variables are not used in any factors nodes: ') print(variables.difference(args)) return False print('Checking that all arguments have matching variable nodes...') if not args.issubset(variables): print('These arguments have missing variables:') print(args.difference(variables)) return False print('Checking that graph has at least one leaf node...') leaf_nodes = [x for x in self.nodes if x.is_leaf()] if not leaf_nodes: print('Graph has no leaf nodes.') raise InvalidGraphException return True
def build_gbn(*args, **kwds): '''Builds a Gaussian Bayesian Graph from a list of functions''' variables = set() name = kwds.get('name') variable_nodes = dict() factor_nodes = dict() if isinstance(args[0], list): # Assume the functions were all # passed in a list in the first # argument. This makes it possible # to build very large graphs with # more than 255 functions, since # Python functions are limited to # 255 arguments. args = args[0] for factor in args: factor_args = get_args(factor) variables.update(factor_args) node = GBNNode(factor) factor_nodes[factor.__name__] = node # Now lets create the connections # To do this we need to find the # factor node representing the variables # in a child factors argument and connect # it to the child node. # Note that calling original_factors # here can break build_gbn if the # factors do not correctly represent # a valid network. This will be fixed # in next release original_factors = get_original_factors(list(factor_nodes.values())) for var_name, factor in list(original_factors.items()): factor.variable_name = var_name for factor_node in list(factor_nodes.values()): factor_args = get_args(factor_node) parents = [original_factors[arg] for arg in factor_args if original_factors[arg] != factor_node] for parent in parents: connect(parent, factor_node) # Now process the raw_betas to create a dict for factor_node in list(factor_nodes.values()): # Now we want betas to always be a dict # but in the case that the node only # has one parent we will allow the user to specify # the single beta for that parent simply # as a number and not a dict. if hasattr(factor_node.func, 'raw_betas'): if isinstance(factor_node.func.raw_betas, Number): # Make sure that if they supply a number # there is only one parent assert len(get_args(factor_node)) == 2 betas = dict() for arg in get_args(factor_node): if arg != factor_node.variable_name: betas[arg] = factor_node.func.raw_betas factor_node.func.betas = betas else: factor_node.func.betas = factor_node.func.raw_betas gbn = GaussianBayesianGraph(original_factors, name=name) # Now for any conditional gaussian nodes # we need to tell the node function what the # parent parameters are so that the pdf can # be computed. sorted = gbn.get_topological_sort() joint_mu, joint_sigma = gbn.get_joint_parameters() for node in sorted: if hasattr(node.func, 'betas'): # This means its multivariate gaussian names = [n.variable_name for n in node.parents] + [node.variable_name] node.func.joint_mu = MeansVector.zeros((len(names), 1), names=names) for name in names: node.func.joint_mu[name] = joint_mu[name][0, 0] node.func.covariance_matrix = CovarianceMatrix.zeros( (len(names), len(names)), names) for row, col in xproduct(names, names): node.func.covariance_matrix[row, col] = joint_sigma[row, col] return gbn
def __init__(self, source, destination, factors, func): self.source = source self.destination = destination self.factors = factors self.argspec = get_args(func) self.func = func
def __repr__(self): return '<FactorNode %s %s(%s)>' % \ (self.name, self.func.__name__, get_args(self.func))
def eliminate_var(f, var): ''' Given a function f return a new function which sums over the variable we want to eliminate This may be where we have the opportunity to remove the use of .value.... ''' arg_spec = get_args(f) pos = arg_spec.index(var) new_spec = arg_spec[:] new_spec.remove(var) # Lets say the orginal argspec is # ('a', 'b', 'c', 'd') and they # are all Booleans # Now lets say we want to eliminate c # This means we want to sum over # f(a, b, True, d) and f(a, b, False, d) # Seems like all we have to do is know # the positionn of c and thats it??? # Ok so its not as simple as that... # this is because when the *call* is made # to the eliminated function, as opposed # to when its built then its only # called with ('a', 'b', 'd') eliminated_pos = arg_spec.index(var) def eliminated(*args): template = arg_spec[:] total = 0 call_args = template[:] i = 0 for arg in args: # To be able to remove .value we # first need to also be able to # remove .name in fact .value is # just a side effect of having to # rely on .name. This means we # probably need to construct a # a list containing the names # of the args based on the position # they are being called. if i == eliminated_pos: # We need to increment i # once more to skip over # the variable being marginalized call_args[i] = 'marginalize me!' i += 1 call_args[i] = arg i += 1 for val in f.domains[var]: #v = VariableNode(name=var) #v.value = val #call_args[pos] = v call_args[pos] = val total += f(*call_args) return total eliminated.argspec = new_spec eliminated.domains = f.domains #eliminated.__name__ = f.__name__ return eliminated
def verify(self): ''' Check several properties of the Factor Graph that should hold. ''' # Check that all nodes are either # instances of classes derived from # VariableNode or FactorNode. # It is a very common error to instantiate # the graph with the factor function # instead of the corresponding factor # node. for node in self.nodes: if not isinstance(node, VariableNode) and \ not isinstance(node, FactorNode): bases = node.__class__.__bases__ if not VariableNode in bases and not FactorNode in bases: print ('Factor Graph does not ' 'support nodes of type: %s' % node.__class__) raise InvalidGraphException # First check that for each node # only connects to nodes of the # other type. print 'Checking neighbour node types...' for node in self.nodes: if not node.verify_neighbour_types(): print '%s has invalid neighbour type.' % node return False print 'Checking that all factor functions have domains...' for node in self.nodes: if isinstance(node, FactorNode): if not hasattr(node.func, 'domains'): print '%s has no domains.' % node raise InvalidGraphException elif not node.func.domains: # Also check for an empty domain dict! print '%s has empty domains.' % node raise InvalidGraphException print 'Checking that all variables are accounted for' + \ ' by at least one function...' variables = set([vn.name for vn in self.nodes if isinstance(vn, VariableNode)]) largs = [get_args(fn.func) for fn in self.nodes if isinstance(fn, FactorNode)] args = set(reduce(lambda x, y: x + y, largs)) if not variables.issubset(args): print 'These variables are not used in any factors nodes: ' print variables.difference(args) return False print 'Checking that all arguments have matching variable nodes...' if not args.issubset(variables): print 'These arguments have missing variables:' print args.difference(variables) return False print 'Checking that graph has at least one leaf node...' leaf_nodes = filter( lambda x: x.is_leaf(), self.nodes) if not leaf_nodes: print 'Graph has no leaf nodes.' raise InvalidGraphException return True