def main(): f1 = Factor(['Trav'], [['t', 'f']], [0.05, 0.95]) f2 = Factor(['Fraud', 'Trav'], [['t', 't', 'f', 'f'], ['t', 'f', 't', 'f']], [0.01, 0.004, 0.99, 0.996]) f3 = Factor(['FP', 'Fraud', 'Trav'], [['t', 't', 't', 't', 'f', 'f', 'f', 'f'], ['t', 't', 'f', 'f', 't', 't', 'f', 'f'], ['t', 'f', 't', 'f', 't', 'f', 't', 'f']], [0.9, 0.1, 0.9, 0.01, 0.1, 0.9, 0.1, 0.99]) f4 = Factor(['IP', 'Fraud', 'OC'], [['t', 't', 't', 't', 'f', 'f', 'f', 'f'], ['t', 't', 'f', 'f', 't', 't', 'f', 'f'], ['t', 'f', 't', 'f', 't', 'f', 't', 'f']], [0.02, 0.011, 0.01, 0.001, 0.98, 0.989, 0.99, 0.999]) f5 = Factor(['CRP', 'OC'], [['t', 't', 'f', 'f'], ['t', 'f', 't', 'f']], [0.1, 0.001, 0.9, 0.999]) f6 = Factor(['OC'], [['t', 'f']], [0.6, 0.4]) factorList = [f1, f2, f3, f4, f5, f6] quaryList = ['Fraud'] hiddenVariables = ['Trav', 'FP', 'Fraud', 'IP', 'OC', 'CRP'] evidenceList1 = dict(IP='t') evidenceList2 = dict(IP='t', CRP='t') Factor.inference(factorList, quaryList, hiddenVariables, evidenceList1) Factor.inference(factorList, quaryList, hiddenVariables, evidenceList2)
def generate_complete(nb_vars, delta): model = GraphicalModel() interaction = delta * np.random.uniform( -1.0, 1.0, nb_vars*(nb_vars-1)) bias = np.random.uniform(-0.1, 0.1, [nb_vars]) for i in range(nb_vars): model.add_variable(ith_object_name('V', i)) for i in range(nb_vars): for j in range(i+1, nb_vars): beta = interaction[i * nb_vars + j ] log_values = np.array([beta, -beta, -beta, beta]).reshape([2,2]) factor = Factor( name = ijth_object_name('F', i, j), variables = [ith_object_name('V', i), ith_object_name('V', j)], log_values = log_values) model.add_factor(factor) for i in range(nb_vars): factor = Factor( name = ith_object_name('B', i), variables = [ith_object_name('V', i)], log_values = np.array([bias[i], -bias[i]])) model.add_factor(factor) return model
def sumOutVariable(factor, variable): """ Sum out a variable from a factor """ newVars = [x for x in factor.variables if x != variable] varIndex = factor.variables.index(variable) newF = Factor(newVars, mnCards) if debug: newF.printF() usedVar = [False for x in range(factor.size)] for i in range(newF.size): newF.phi.append(0) start = 0 for k in range(factor.size): if usedVar[k] == False: start = k break for j in range(factor.card[varIndex]): key = start + factor.stride[varIndex] * j newF.phi[i] += factor.phi[key] usedVar[key] = True if debug: print "phi: ", i, " ", newF.phi[i] return newF
def run(self, query, observed, elim_order, log): self.wall_clock_time = datetime.datetime.now() factors = np.array([]) for node in self.network.nodes: factor = self._makeFactor(node, observed, log) if factor.nr_nodes > 0: factors = np.append(factors, factor) log.write("factor " + str(factors.size) + ":\n" + str(factor) + "\n") for node in elim_order: node_factors_indices = [ i for i, factor in enumerate(factors) if node in factor.nodes ] node_factors = factors[node_factors_indices] product, nr_multiplication_steps = Factor.product( node_factors.copy()) self.nr_multiplication_steps += nr_multiplication_steps if product.nr_nodes > 1: marginalization = product.marginalize(node) factors = np.append(factors, marginalization) log.write( "\n---------------------------------------------------------------\n" "Eliminating " + node + "\n\n") log.write("Multiply\n") for i, node_factor in enumerate(node_factors): log.write("factor " + str(i + 1) + ":\n" + str(node_factor) + "\n") log.write("\nProduct:\n" + str(product) + "\n") log.write("\nMarginalization:\n" + str(marginalization)) factors = np.delete(factors, node_factors_indices) result_product, nr_multiplication_steps = Factor.product( factors.copy()) result = result_product.normalize(query) log.write( "\n\n---------------------------------------------------------------\n" "Producing query " + query + "\n\n") log.write("Multiply\n") for i, factor in enumerate(factors): log.write("factor " + str(i + 1) + ":\n" + str(factor) + "\n") log.write("\nProduct:\n" + str(result_product) + "\n") log.write("\nNormalization:\n" + str(result)) self.wall_clock_time = datetime.datetime.now() - self.wall_clock_time self.nr_multiplication_steps += nr_multiplication_steps return result
def sum_out(var, factors): new_factor_list = [] for f in factors: if var in f.variables: new_variables = list(f.variables) new_variables.remove(var) name = '' for v in new_variables: name += v.name new_f = Factor(name,new_variables) new_f.phi = np.zeros((0,)) # DIRECTIONS # Must check that the other x assignments match, then add together v_index = f.variables.index(var) # WE NEED A MORE CREATIVE WAY TO DO THIS # IS INDEX TO ASSIGNMENT ACTUALLY WORKING?? for i in range(0, len(f.phi)): assignment1 = list(f.index_to_assignment(i)) #print 'assignment 1: ' + str(assignment1) is_sum = False next_sum = f.phi[i] for j in range(i, len(f.phi)): assignment2 = list(f.index_to_assignment(j)) summable = True #print'assignment 2: ' + str(assignment2) for k in range(0, len(assignment2)): idx2 = assignment2[k] idx1 = assignment1[k] #print assignment1, assignment2 if k == v_index: #print k, idx1, idx2 if idx2 == idx1: #print 'not summable, a1[k] == a2[k]' summable = False break if k != v_index: #print 'NOT', k, idx1, idx2 if idx2 != idx1: #print 'not summable, a1[k] != a2[k]' summable = False break if summable == True: is_sum = True next_sum += f.phi[j] if is_sum == True: new_f.phi = np.append(new_f.phi, next_sum) new_factor_list.append(new_f) #can't sum this factor out on this variable else: new_factor_list.append(f) return new_factor_list
def factor_sum(A, B): """Same as factor_product, but sums instead of multiplies """ if A.is_empty(): return B if B.is_empty(): return A # Create output factor. Variables should be the union between of the # variables contained in the two input factors out = Factor() out.var = np.union1d(A.var, B.var) # Compute mapping between the variable ordering between the two factors # and the output to set the cardinality out.card = np.zeros(len(out.var), np.int64) mapA = np.argmax(out.var[None, :] == A.var[:, None], axis=-1) mapB = np.argmax(out.var[None, :] == B.var[:, None], axis=-1) out.card[mapA] = A.card out.card[mapB] = B.card # For each assignment in the output, compute which row of the input factors # it comes from out.val = np.zeros(np.prod(out.card)) assignments = out.get_all_assignments() idxA = assignment_to_index(assignments[:, mapA], A.card) idxB = assignment_to_index(assignments[:, mapB], B.card) """ YOUR CODE HERE You should populate the .val field with the factor sum. The code for this should be very similar to the factor_product(). """ out.val = A.val[idxA] + B.val[idxB] return out
def test_get_multiplied(self): factor1 = Factor([self.random_variables[0], self.random_variables[1]]) factor2 = Factor([self.random_variables[1], self.random_variables[2]]) factor1.add_value([1, 3], 10).add_value([1, 4], 20).add_value([2, 4], 30) factor2.add_value([3, 2], 5).add_value([4, 2], 2) product_factor = factor1.get_multiplied(factor2) self.assertEqual(product_factor.get_value({ 'x': 1, 'y': 3, 'z': 2 }), 50) self.assertEqual(product_factor.get_value({ 'x': 1, 'y': 4, 'z': 2 }), 40) self.assertEqual(product_factor.get_value({ 'x': 2, 'y': 4, 'z': 2 }), 60) self.assertEqual(product_factor.get_value({'x': 1, 'y': 5, 'z': 2}), 0) self.assertEqual(product_factor.get_value({'x': 2, 'y': 5, 'z': 2}), 0) self.assertEqual(product_factor.get_value({'x': 2, 'y': 5, 'z': 2}), 0)
def ComputeMarginal(jt_cliques, jt_edges, jt_clique_factors, msg): output = [] for i in range(len(jt_cliques)): Neighbors_i = Find_Neighbors(jt_edges, i) msg_product = Factor() for j in range(len(Neighbors_i)): msg_product = factor_product(msg_product, msg[Neighbors_i[j]][i]) msg_product = factor_product(jt_clique_factors[i], msg_product) normalize_sum = np.sum(msg_product.val) msg_product.val = msg_product.val / (normalize_sum * 1.0) output.append(msg_product) return output
def test_cycle(self): xy_factor = Factor( [self.random_variables[0], self.random_variables[1]]) xz_factor = Factor( [self.random_variables[0], self.random_variables[2]]) yz_factor = Factor( [self.random_variables[1], self.random_variables[2]]) graphical_model = GraphicalModel() graphical_model.add_factor(xy_factor).add_factor(xz_factor).add_factor( yz_factor) marginal_inference = MpMarginalInference(graphical_model) marginal_inference.get_marginal_factor([self.random_variables[0]], {})
def test_add_factors(self): graphical_model = GraphicalModel() factor1 = Factor([RandomVariable('x', (1, 2))]) factor2 = Factor( [RandomVariable('y', (2, 3, 4)), RandomVariable('z', (2, ))]) factor3 = Factor( [RandomVariable('x', (1, 2)), RandomVariable('z', (2, ))]) graphical_model.add_factor(factor1).add_factor(factor2).add_factor( factor3) print(graphical_model.random_variables)
def _get_conditional_probability(nodes, edges, factors, evidence, initial_samples, num_iterations, num_burn_in): """ Returns the conditional probability p(Xf | Xe) where Xe is the set of observed nodes and Xf are the query nodes i.e. the unobserved nodes. The conditional probability is approximated using Gibbs sampling. Args: nodes: numpy array of nodes e.g. [x1, x2, ...]. edges: numpy array of edges e.g. [i, j] implies that nodes[i] is the parent of nodes[j]. factors: dictionary of Factors e.g. factors[x1] returns the conditional probability of x1 given all other nodes. evidence: dictionary of evidence e.g. evidence[x4] returns the provided evidence for x4. initial_samples: dictionary of initial samples to initialize Gibbs sampling. num_iterations: number of sampling iterations num_burn_in: number of burn-in iterations Returns: returns Factor of conditional probability. """ assert num_iterations > num_burn_in conditional_prob = Factor() """ YOUR CODE HERE """ for factor_index in factors: factors[factor_index] = factor_evidence(factors[factor_index],evidence) remove_nodes = list(evidence.keys()) for node in remove_nodes: initial_samples.pop(node) index = np.argwhere(nodes == node) nodes = np.delete(nodes,index) total_run = num_burn_in + num_iterations sample_result = np.zeros([total_run, len(nodes)]) for i in range(total_run): initial_samples = _sample_step(nodes, factors, initial_samples) sample_result[i] = np.array(list(initial_samples.values())) # set freq dict freq = {} for i in range(len(factors[0].val)): freq[i] = 0 card = factors[0].card for i in range(num_burn_in,num_iterations): index = assignment_to_index(sample_result[i],card) freq[index] += 1 freq_arr = np.array(list(freq.values())) freq_arr = freq_arr/np.sum(freq_arr) conditional_prob.var = factors[0].var conditional_prob.card = card conditional_prob.val = freq_arr """ END YOUR CODE HERE """ return conditional_prob
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.random_variables = [ RandomVariable('a', (1, 2)), RandomVariable('b', (3, 4, 5)), RandomVariable('c', (2, )), RandomVariable('d', (1, 2, 3, 4, 5, 6)), ] self.factors = [ Factor([self.random_variables[0]]), Factor([self.random_variables[0], self.random_variables[1]]), Factor([self.random_variables[1], self.random_variables[2]]) ]
def test_get_max_marginalized(self): init_factor = Factor(self.random_variables) init_factor.add_value([1, 3, 2], 10).add_value([1, 4, 2], 5).add_value([1, 5, 2], 2) init_factor.add_value([2, 4, 2], 20).add_value([2, 5, 2], 30) factor, assignment = init_factor.get_max_marginalized( [self.random_variables[0]]) marginalized_variables = [ self.random_variables[1], self.random_variables[2] ] self.assertEqual(20, factor.get_value({'y': 4, 'z': 2})) self.assertEqual(30, factor.get_value({'y': 5, 'z': 2})) self.assertEqual(10, factor.get_value({'y': 3, 'z': 2})) variable_permutation = list( map(lambda x: factor.random_variables.index(x), marginalized_variables)) mapped_assignment = {} for key, value in assignment.items(): mapped_key = tuple( map(lambda x: key[variable_permutation[x]], range(len(key)))) mapped_assignment[mapped_key] = value self.assertEqual({ (3, 2): (1, ), (4, 2): (2, ), (5, 2): (2, ) }, mapped_assignment) factor, assignment = init_factor.get_max_marginalized( self.random_variables) self.assertEqual(30, factor.get_value([])) self.assertEqual({(): (2, 5, 2)}, assignment)
def test_get_normalized(self): factor = Factor(self.random_variables) factor.add_value([1, 3, 2], 10).add_value([2, 4, 2], 20).add_value([2, 5, 2], 30) factor = factor.get_normalized() self.assertAlmostEquals(1 / 6, factor.get_value([1, 3, 2])) self.assertAlmostEquals(1 / 3, factor.get_value([2, 4, 2])) self.assertAlmostEquals(1 / 2, factor.get_value([2, 5, 2])) self.assertAlmostEquals(0, factor.get_value([1, 4, 2])) self.assertAlmostEquals(0, factor.get_value([1, 5, 2])) self.assertAlmostEquals(0, factor.get_value([2, 3, 2]))
def _sample_step(nodes, proposal_factors): """ Performs one iteration of importance sampling where it should sample a sample for each node. The sampling should be done in topological order. Args: nodes: numpy array of nodes. nodes are sampled in the order specified in nodes proposal_factors: dictionary of proposal factors where proposal_factors[1] returns the sample distribution for node 1 Returns: dictionary of node samples where samples[1] return the scalar sample for node 1. """ samples = {} """ YOUR CODE HERE: Use np.random.choice """ evidence = {} fac = Factor() for node in nodes: fac = factor_evidence(proposal_factors[node], evidence) n = np.random.choice(fac.card[0], 1, p=fac.val) samples[node] = n evidence[node] = n """ END YOUR CODE HERE """ assert len(samples.keys()) == len(nodes) return samples
def main(): factorTrav = Factor(['Trav'], [['t','f']], [0.05,0.95]) factorFraudTrav = Factor(['Fraud','Trav'],[['t','t','f','f'],['t','f','t','f']],[0.01,0.004,0.99,0.996]) factorFPFraudTrav = Factor(['FP','Fraud','Trav'],[['t','t','t','t','f','f','f','f'], ['t','t','f','f','t','t','f','f'], ['t','f','t','f','t','f','t','f']], [0.9,0.1,0.9,0.01,0.1,0.9,0.1,0.99]) factorIPFraudOC = Factor(['IP','Fraud','OC'],[['t','t','t','t','f','f','f','f'], ['t','t','f','f','t','t','f','f'], ['t','f','t','f','t','f','t','f']], [0.02,0.011,0.01,0.001,0.98,0.989,0.99,0.999]) factorCRPOC = Factor(['CRP','OC'],[['t','t','f','f'],['t','f','t','f']],[0.1,0.001,0.9,0.999]) factorOC = Factor(['OC'],[['t','f']],[0.6,0.4]) Factor.textFormattedPrint('Original factors', 30, [factorTrav, factorFraudTrav, factorFPFraudTrav,\ factorIPFraudOC, factorOC, factorCRPOC])
def _get_clique_factors(jt_cliques, factors): """ Assign node factors to cliques in the junction tree and derive the clique factors. Args: jt_cliques: list of junction tree maximal cliques e.g. [[x1, x2, x3], [x2, x3], ... ] factors: list of factors from the original graph Returns: list of clique factors where the factor(jt_cliques[i]) = clique_factors[i] """ clique_factors = [Factor() for _ in jt_cliques] """ YOUR CODE HERE """ for i in range(len(clique_factors)): jt_clique = jt_cliques[i] temp_factor = factors.copy() for factor in temp_factor: ret = list(set(jt_clique).intersection(list(factor.var))) if sorted(ret) == sorted(list(factor.var)): clique_factors[i] = factor_product(clique_factors[i], factor) factors.remove(factor) """ END YOUR CODE HERE """ assert len(clique_factors) == len( jt_cliques ), 'there should be equal number of cliques and clique factors' return clique_factors
def _update_reparameterization_for_(self, variable): belief_from_ = dict() log_average_belief = 0.0 average_weight = 0.0 for rvar in self.variables_replicated_from_[variable]: belief_from_[rvar] = self.get_marginals_upper_to_(rvar) belief_from_[rvar].marginalize_except_([rvar]) belief_from_[rvar].variables = [variable] belief_from_[rvar].normalize() log_average_belief = ( log_average_belief + self.holder_weights_for_[rvar] * belief_from_[rvar].log_values) converge_flag = True for rvar in self.variables_replicated_from_[variable]: if np.sum( np.abs(-belief_from_[rvar].log_values + log_average_belief)) > 1e-2: converge_flag = False temp_log_val = (self.holder_weights_for_[rvar]) * ( -belief_from_[rvar].log_values + log_average_belief) temp = Factor("", [rvar], log_values=temp_log_val) self.factor_upper_to_[rvar].product(temp) return converge_flag
def main(): hiddenVariables = ['Trav','FP','Fraud','IP','OC','CRP'] factorTrav = Factor(['Trav'], [['t','f']], [0.05,0.95]) factorFraudTrav = Factor(['Fraud','Trav'],[['t','t','f','f'],['t','f','t','f']],[0.01,0.004,0.99,0.996]) factorFPFraudTrav = Factor(['FP','Fraud','Trav'],[['t','t','t','t','f','f','f','f'], ['t','t','f','f','t','t','f','f'], ['t','f','t','f','t','f','t','f']], [0.9,0.1,0.9,0.01,0.1,0.9,0.1,0.99]) factorIPFraudOC = Factor(['IP','Fraud','OC'],[['t','t','t','t','f','f','f','f'], ['t','t','f','f','t','t','f','f'], ['t','f','t','f','t','f','t','f']], [0.02,0.011,0.01,0.001,0.98,0.989,0.99,0.999]) factorCRPOC = Factor(['CRP','OC'],[['t','t','f','f'],['t','f','t','f']],[0.1,0.001,0.9,0.999]) factorOC = Factor(['OC'],[['t','f']],[0.6,0.4]) Factor.inference([factorTrav,factorFraudTrav,factorFPFraudTrav,factorIPFraudOC,factorOC,factorCRPOC], ['Fraud'],hiddenVariables,[['FP','t'],['CRP','t'],['IP','f'],['Trav','t']])
def test_get_constant_multiplied(self): factor = Factor(self.random_variables) factor.add_value([1, 3, 2], 10).add_value([2, 4, 2], 20).add_value([2, 5, 2], 30) factor = factor.get_constant_multiplied(2) self.assertEqual(factor.get_value([1, 3, 2]), 20) self.assertEqual(factor.get_value([2, 4, 2]), 40) self.assertEqual(factor.get_value([2, 5, 2]), 60) self.assertEqual(factor.get_value([1, 4, 2]), 0) self.assertEqual(factor.get_value([2, 3, 2]), 0)
def test_get_inverse(self): factor = Factor(self.random_variables) factor.add_value([1, 3, 2], 10).add_value([2, 4, 2], 20).add_value([2, 5, 2], 30) factor = factor.get_inverse() self.assertEqual(factor.get_value([1, 3, 2]), 1 / 10) self.assertEqual(factor.get_value([2, 4, 2]), 1 / 20) self.assertEqual(factor.get_value([2, 5, 2]), 1 / 30) self.assertEqual(factor.get_value([1, 4, 2]), float("inf")) self.assertEqual(factor.get_value([2, 3, 2]), float("inf"))
def readFunctionTables(): """ Reads function tables defined in the input file """ global mnFactors mnFactors = [] factorIndex, factorNum = 0, 0 fClass = Factor(mnCliques[factorIndex], mnCards) factor = [] done = False while not done: inputRow = inFile.readline().strip() if inputRow == "": continue else: if " " in inputRow: splitRow = inputRow.split() if factorNum == 0: factorNum = int(splitRow[0]) for i in range(1, len(splitRow)): factor.append(float(splitRow[i])) else: for i in range(len(splitRow)): factor.append(float(splitRow[i])) if factorNum == len(factor): # Reset for next factor factorNum = 0 fClass.phi = factor mnFactors.append(fClass) factor = [] factorIndex += 1 if factorIndex < len(mnCliques): fClass = Factor(mnCliques[factorIndex], mnCards) else: factorNum = int(inputRow) if len(mnFactors) >= len(mnCliques): # All cliques have been read so break out of while loop done = True if debug: for i in range(len(mnFactors)): mnFactors[i].printF()
def main(): values = [['t', 't', 't', 't', 'f', 'f', 'f', 'f'], ['t', 't', 'f', 'f', 't', 't', 'f', 'f'], ['t', 'f', 't', 'f', 't', 'f', 't', 'f']] variables = ['X', 'Y', 'Z'] mappedValues = [0.1, 0.9, 0.2, 0.8, 0.4, 0.6, 0.3, 0.7] aFactor = Factor(variables, values, mappedValues) print aFactor aFactor.restrict('Z', 'f') print aFactor aFactor.restrict('X', 't') print aFactor del aFactor factor1 = Factor(['A', 'B'], [['t', 't', 'f', 'f'], ['t', 'f', 't', 'f']], [0.1, 0.9, 0.2, 0.8]) factor2 = Factor(['B', 'C'], [['t', 't', 'f', 'f'], ['t', 'f', 't', 'f']], [0.3, 0.7, 0.6, 0.4]) print factor1 print factor2 newFactor = Factor.multiply(factor1, factor2) del factor1 del factor2 print newFactor newFactor.normalize() print newFactor del newFactor anotherFactor = Factor(['A','B','C'], [['t','t','t','t','f','f','f','f'], ['t','t','f','f','t','t','f','f'], ['t','f','t','f','t','f','t','f']], [0.03,0.07,0.54,0.36,0.06,0.14,0.48,0.32]) anotherFactor.sumout('B') print anotherFactor # example from slide 75 factorA = Factor(['A'],[['t','f']], [0.3, 0.7]) factorCA = Factor(['A','C'],[['t','t','f','f'],['t','f','t','f']],[0.8, 0.2, 0.15, 0.85]) factorGC = Factor(['C', 'G'],[['t','t','f','f'],['t','f','t','f']],[1.0, 0.0, 0.2, 0.8]) factorLG = Factor(['G', 'L'],[['t','t','f','f'],['t','f','t','f']],[0.7, 0.3, 0.2, 0.8]) factorStrueL = Factor(['L','S'],[['t','t','f','f'],['t','f','t','f']],[0.9,0.1,0.3,0.7]) resultFactor = Factor.inference([factorA,factorCA,factorGC,factorLG,factorStrueL],['S'],['L','G','C','A'],[]) print resultFactor
def _makeFactor(self, node, observed, log): nodes = np.array([node] + self.network.parents[node]) probs = self.network.probabilities[node].values nodes_in_observed = nodes[np.in1d(nodes, observed.keys())] if nodes_in_observed.size > 0: if nodes.size > nodes_in_observed.size: factor = Factor(nodes, probs, self.network) log.write("\nReduce\n") log.write(str(factor)) log.write("Evidence: " + str(observed) + "\n\n") return factor.reduce(nodes_in_observed, observed) else: return Factor() else: return Factor(nodes, probs, self.network)
def sumOutVariable(factor, variable): """ Sum out a variable for variable elimination """ if debug: print "eliminating factor" newVars = [x for x in factor.variables if x != variable] if debug: print "new vars: ", newVars newF = Factor(newVars, mnCards) if len(newVars) > 0: varIndex = factor.variables.index(variable) if debug: print "var index: ", varIndex newF.printF() usedVar = [False for x in range(factor.size)] if debug: print "used var: ", usedVar print "stride: ", factor.stride[varIndex], ", card: ", factor.card[varIndex] psi = [] for i in range(newF.size): psi.append(0) start = 0 for k in range(factor.size): if usedVar[k] == False: start = k break for j in range(factor.card[varIndex]): if debug: print "start: ", start, " stride: ", factor.stride[varIndex], " j: ", j psi[i] += factor.phi[start + factor.stride[varIndex] * j] usedVar[start + factor.stride[varIndex] * j] = True if debug: print "psi: ", i, " ", psi[i] newF.phi = psi[:] return newF
def test_get_sum_marginalized(self): init_factor = Factor(self.random_variables) init_factor.add_value([1, 3, 2], 10).add_value([1, 4, 2], 5).add_value([1, 5, 2], 2) init_factor.add_value([2, 4, 2], 20).add_value([2, 5, 2], 30) factor = init_factor.get_sum_marginalized([self.random_variables[0]]) self.assertEqual(25, factor.get_value({'y': 4, 'z': 2})) self.assertEqual(32, factor.get_value({'y': 5, 'z': 2})) self.assertEqual(10, factor.get_value({'y': 3, 'z': 2})) factor = init_factor.get_sum_marginalized(self.random_variables) self.assertEqual(67, factor.get_value([]))
def generate_grid(nb_vars, delta): model = GraphicalModel() grid_size = int(nb_vars**0.5) interaction = delta * np.random.uniform(-1.0, 1.0, 2*grid_size*(grid_size-1)) bias = np.random.uniform(-0.1, 0.1, [grid_size,grid_size]) for i in range(grid_size): for j in range(grid_size): model.add_variable(ijth_object_name('V', i,j)) edge_set = [] for x in range(grid_size*grid_size): q, m = divmod(x, grid_size) if m != grid_size-1: edge_set.append([x,x+1]) if q != grid_size-1: edge_set.append([x,x+grid_size]) for i, e in enumerate(edge_set): beta = interaction[i] q1, m1 = divmod(e[0], grid_size) V1 = ijth_object_name('V', q1,m1) q2, m2 = divmod(e[1],grid_size) V2 = ijth_object_name('V', q2,m2) log_values = np.array([beta, -beta, -beta, beta]).reshape([2,2]) factor = Factor(name = ith_object_name('F', i), variables = [V1, V2], log_values = log_values) model.add_factor(factor) for i in range(grid_size): for j in range(grid_size): log_values = np.array([bias[i,j], -bias[i,j]]) model.add_factor(Factor(name = ith_object_name('B', i*grid_size + j), variables = [ijth_object_name('V', i,j)], log_values = log_values)) return model
def compute_marginals_naive(V, factors, evidence): """Computes the marginal over a set of given variables Args: V (int): Single Variable to perform inference on factors (List[Factor]): List of factors representing the graphical model evidence (Dict): Observed evidence. evidence[k] = v indicates that variable k has the value v. Returns: Factor representing the marginals """ output = Factor() # compute the joint distribution joint_distribution = compute_joint_distribution(factors) variables = factors[0].var for factor_index in range(1, len(factors)): variables = np.union1d(variables, factors[factor_index].var) evidence_keys = list(evidence.keys()) evidence_keys.append(V) variables_need_marginalize = np.setxor1d(variables, np.array(evidence_keys)) after_marginalize = factor_marginalize(joint_distribution, variables_need_marginalize) after_observe = observe_evidence([after_marginalize], evidence)[0] ##Normalize normalize_sum = np.sum(after_observe.val) after_observe.val /= normalize_sum # delete evidence, evidence value inequal to evidence needs to be delete, also card, also var assignment = after_observe.get_all_assignments() for evid in evidence: temp_value = evidence[evid] delete_index = np.array(np.where(after_observe.var == evid))[0][0] assignment_delete_index = np.array( np.where(assignment[:, delete_index] != temp_value)) after_observe.var = np.delete(after_observe.var, delete_index) after_observe.card = np.delete(after_observe.card, delete_index) after_observe.val = np.delete(after_observe.val, assignment_delete_index, axis=0) assignment = np.delete(assignment, assignment_delete_index, axis=0) assignment = np.delete(assignment, delete_index, axis=1) output = after_observe """ YOUR CODE HERE Compute the marginal. Output should be a factor. Remember to normalize the probabilities! """ return output
def test_get_value(self): factor = Factor(self.random_variables) factor.add_value([1, 3, 2], 10).add_value([2, 4, 2], 20).add_value([2, 5, 2], 30) self.assertEqual(factor.get_value([1, 3, 2]), 10) self.assertEqual(factor.get_value([2, 4, 2]), 20) self.assertEqual(factor.get_value([2, 5, 2]), 30) self.assertEqual(factor.get_value([1, 4, 2]), 0) self.assertEqual(factor.get_value([2, 3, 2]), 0)
def factorProduct(f1, f2): """ Multiplies two factors by finding same variable assignments and multiplying their phi together """ # Get the unique variables across both factors uniqueVars = f2.variables[:] for i in range(len(f1.variables)): if f1.variables[i] not in uniqueVars: uniqueVars.insert(0, f1.variables[i]) # Create the new product factor factor = Factor(uniqueVars, mnCards) j, k = 0, 0 psi = [] assignment = [0 for i in range(len(factor.variables))] for i in range(factor.size): #if debug: print "multiplying", j, k, f1.phi[j], f2.phi[k] psi.append(f1.phi[j] * f2.phi[k]) # Loop from last to first since values are 000, 001, 010, 011 instead of 000, 100, 010, 110 for v in range(len(factor.variables)-1,-1,-1): curVariable = factor.variables[v] assignment[v] += 1 if assignment[v] == factor.card[v]: assignment[v] = 0 if curVariable in f1.variables: j = j - (factor.card[v] - 1) * f1.stride[f1.variables.index(curVariable)] if curVariable in f2.variables: k = k - (factor.card[v] - 1) * f2.stride[f2.variables.index(curVariable)] else: if curVariable in f1.variables: j = j + f1.stride[f1.variables.index(curVariable)] if curVariable in f2.variables: k = k + f2.stride[f2.variables.index(curVariable)] break factor.phi = psi[:] return factor
def instantiate(var, factors): new_factor_list = [] for f in factors: if var in f.variables: new_variables = list(f.variables) new_variables.remove(var) name = '' for v in new_variables: name += v.name new_f = Factor(name,new_variables) new_f.phi = np.zeros((0,)) v_index = f.variables.index(var) for i in range(0, len(f.phi)): assignment = f.index_to_assignment(i) if assignment[v_index] == var.value: # print f.phi[i] new_f.phi = np.append(new_f.phi, f.phi[i]) new_factor_list.append(new_f) else: new_factor_list.append(f) return new_factor_list
def cal_prop_dist(nodes, proposal_factors): # I choose factors in the following way: factor.var = [0,1,2] means p(x2|x0, x1). So when we want to choose q(x2), \ # we just need to find p(x2|x0, x1) = q(x2), we use factor.var[-1] == node fac = Factor() factors = proposal_factors.copy() for node in nodes: for index in factors: if node == factors[index].var[-1]: fac = factor_product(fac, factors[index]) factors.pop(index) break return fac
def factor_marginalize(factor, var): """Sums over a list of variables. Args: factor (Factor): Input factor var (List): Variables to marginalize out Returns: out: Factor with variables in 'var' marginalized out. """ out = Factor() """ YOUR CODE HERE Marginalize out the variables given in var """ out.var = np.setxor1d(factor.var, np.array(var)) for out_var in out.var: index = np.where(factor.var == out_var) out.card = np.append(out.card, factor.card[index]) assignment = factor.get_all_assignments() index = [] for single_var in var: index.append(np.where(factor.var == single_var)) assignment = np.delete(assignment, index, axis=1) out.val = np.zeros(np.prod(out.card)) for i in np.unique(assignment, axis=0): index_set = np.array(np.where(np.all(i == assignment, axis=1))) single_assignment = assignment_to_index(i, out.card) out.val[single_assignment] = np.sum(factor.val[index_set]) return out
def generate_uai(model_name): model = GraphicalModel() model.name=model_name with open(UAI_PATH+model_name+'.uai') as f: a = f.readlines() content = [] for c in a: content.extend(c.split()) cnt = 1 nb_vars = int(content[cnt]) cardinalities = dict() for t in range(nb_vars): cnt += 1 newvar = 'V' + str(t) model.add_variable(newvar) cardinalities[newvar] = int(content[cnt]) cnt += 1 nfactors = int(content[cnt]) factor_variables = dict() for t in range(nfactors): cnt += 1 newfac_name = 'F' + str(t) factor_size = int(content[cnt]) factor_variables[newfac_name] = [] for t2 in range(factor_size): cnt += 1 factor_variables[newfac_name].append('V' + str(content[cnt])) for t in range(nfactors): cnt += 1 value_num = int(content[cnt]) newfac_name = 'F' + str(t) values = [] for vt2 in range(value_num): cnt += 1 values.append(float(content[cnt])) values = np.reshape( values, [cardinalities[var] for var in factor_variables[newfac_name]]) factor = Factor( name = newfac_name, variables = factor_variables[newfac_name], values = values) model.add_factor(factor) return model
def perform_ve_inference(self, query_variable, evidence_variables): """Calculates the probability distribution P(query_variable|evidence_variables) using variable elimination. Assumes that we have only one query variable. Keyword arguments: query_variable -- The name of the query variable (as called in the network). evidence_variables -- A dictionary containing variable names as keys and observed values as values. Returns: distribution -- A dictionary containing the values of the query variable as keys and the probabilities as values. """ known_variables = list(evidence_variables.keys()) known_variables.append(query_variable) hidden_variables = [] for _,var in enumerate(self.network.V): if var not in known_variables: hidden_variables.append(var) variables, dependency_levels = self.__find_dependency_levels(known_variables, hidden_variables) #we don't want the query variable to be considered as known #in the factor creation process known_variables.remove(query_variable) factors = self.__create_factors(evidence_variables, variables, dependency_levels) current_factor = Factor() for i in xrange(len(hidden_variables)-1, -1, -1): current_factor = self.__multiply_factors(factors, current_factor, dependency_levels, i) current_factor.sum_out(hidden_variables[i]) current_factor = self.__multiply_factors(factors, current_factor, dependency_levels, -1) alpha = sum(numpy.array(current_factor.probabilities)) distribution = dict() for i,value in enumerate(current_factor.values): distribution[value[0]] = current_factor.probabilities[i] / alpha return distribution
def __init__(self, model): self.model = model.copy() init_np_func = np.ones self.factors_adj_to_ = { var: self.model.get_adj_factors(var) for var in self.model.variables } self.messages = dict() for fac in model.factors: for var in fac.variables: self.messages[(fac, var)] = Factor.initialize_with_( default_message_name(), [var], init_np_func, model.get_cardinality_for_(var)) self.messages[(fac, var)].normalize() for fac in model.factors: for var in fac.variables: self.messages[(var, fac)] = Factor.initialize_with_( default_message_name(), [var], init_np_func, model.get_cardinality_for_(var)) self.messages[(var, fac)].normalize()
def _update_mean_fields(self): variable_order = np.random.permutation(self.model.variables) for var in variable_order: next_mean_field = Factor.full_like_(self.mean_fields[var], 0.0) for fac in self.model.get_adj_factors(var): tmp = Factor(name = 'tmp', variables = [var], values = np.ones(self.model.get_cardinality_for_(var))) tmp = product_over_(tmp, *[self.mean_fields[var1] for var1 in fac.variables if var1 != var]) tmp.transpose_by_(fac.variables) tmp.log_values = fac.log_values * tmp.values next_mean_field = next_mean_field + tmp.marginalize_except_([var], inplace = False) self.mean_fields[var] = next_mean_field.exp(inplace = False).normalize(inplace = False) self.mean_fields[var].log_values = np.nan_to_num(self.mean_fields[var].log_values)
def main(): factorC = Factor(['C'],[['t','f']],[0.32,0.68]) factorM = Factor(['M'],[['t','f']],[0.08,0.92]) factorBMC = Factor(['TB','M','C'], \ [['t','f','t','f','t','f','t','f'], \ ['t','t','t','t','f','f','f','f'], \ ['t','t','f','f','t','t','f','f']], \ [0.61,0.39,0.52,0.48,0.78,0.22,0.044,0.956]) factorRB = Factor(['TB','R'],[['t','t','f','f'],['t','f','t','f']],[0.98,0.02,0.01,0.99]) factorDR = Factor(['R','D'],[['t','t','f','f'],['t','f','t','f']],[0.96,0.04,0.001,0.999]) print "Factor f0(C)" print factorC print "Factor f1(M)" print factorM print "Factor f2(TB,M,C)" print factorBMC print "Factor f3(R,TB)" print factorRB print "Factor f4(D,R)" print factorDR print Factor.inference([factorC, factorM, factorBMC, factorRB, factorDR],['C'],['R','TB','M','C'],[['D','t']])
def make_factor(var, e, bn): """ Return the factor for var in bn's joint distribution given e. That is, bn's full joint distribution, projected to accord with e, is the pointwise product of these factors for bn's variables. :param var: :param e: :param bn: :return: """ node = bn.variable_node(var) variables = [X for X in [var] + node.parents if X not in e] cpt = {} for e1 in all_events(variables, bn, e): cpt[event_values(e1, variables)] = node.p(e1[var], e1) return Factor(variables, cpt)
def factor_product(A, B): """Compute product of two factors. Suppose A = phi(X_1, X_2), B = phi(X_2, X_3), the function should return phi(X_1, X_2, X_3) """ if A.is_empty(): return B if B.is_empty(): return A # Create output factor. Variables should be the union between of the # variables contained in the two input factors out = Factor() out.var = np.union1d(A.var, B.var) # Compute mapping between the variable ordering between the two factors # and the output to set the cardinality out.card = np.zeros(len(out.var), np.int64) mapA = np.argmax(out.var[None, :] == A.var[:, None], axis=-1) mapB = np.argmax(out.var[None, :] == B.var[:, None], axis=-1) out.card[mapA] = A.card out.card[mapB] = B.card # For each assignment in the output, compute which row of the input factors # it comes from out.val = np.zeros(np.prod(out.card)) assignments = out.get_all_assignments() idxA = assignment_to_index(assignments[:, mapA], A.card) idxB = assignment_to_index(assignments[:, mapB], B.card) out.val = A.val[idxA] * B.val[idxB] """ YOUR CODE HERE You should populate the .val field with the factor product Hint: The code for this function should be very short (~1 line). Try to understand what the above lines are doing, in order to implement subsequent parts. """ return out
def ppd(self, query, evidence): """ Computes a posterior probability distribution for the current network, given some query variable and some evidence. :param query: :param evidence: a dictionary of evidence variables. {'varname': varvalue} :return: A dictionary whose keys are tuples of query values. The values of the dictionary are the values of the PPD. """ factors = [] # build a list with the initial factors for k in self.net: parents = self.net[k]['parents'] # build a list with the domains of the parents # use values from the evidence if available dom = [self.net[parent]['values'] if parent not in evidence else [evidence[parent]] for parent in parents] # add this variables' domain, or value from evidence if available dom += [self.net[k]['values'] if k not in evidence else [evidence[k]]] # the variables for this factor are the parents of the variable and # the variable vars_ = parents + [k] # build the probability table for this factor table = {} for row in product(*dom): table[row] = self[vars_[-1]]['cpt'][row] factors.append(Factor(OrderedDict(zip(vars_, dom)), table)) hidden_vars = set(self.net.keys()) - set(query) - set(evidence) hidden_vars = list(hidden_vars) # TODO apply an ordering function ordering = hidden_vars step_nr = 0 for var in ordering: self.__add_new_state_verbose(step_nr, factors) step_nr += 1 # join and sum factors that include var subset = [] for factor in factors: if var in factor: subset.append(factor) self.__add_factor_table_verbose(subset) new_factor = Factor.join(subset) new_factor.eliminate(var) self.step_by_step += '\n\n\tEliminate ' + str(var) for i in subset: factors.remove(i) factors.append(new_factor) self.__add_new_factor_verbose(new_factor) #self.__add_factor_table_verbose(factors) final_factor = Factor.join(factors) self.step_by_step += '\n\n' + str(step_nr) + ' Factors: ' + str(factors) # remove evidence columns for var in evidence: if var not in query: final_factor.eliminate(var) self.step_by_step += '\n\n\tEliminate Evidence ' + str(var) step_nr += 1 self.step_by_step += '\n\n' + str(step_nr) + ' Factors: ' + str(list(final_factor.vars_)) self.step_by_step += '\n\n\t\t' + str(list(final_factor.vars_.keys())) for row in new_factor.table: self.step_by_step += '\n\t\t' + str(row) + ' ' + str(new_factor.table[row]) # build the final, normalized table norm_constant = 0 for row in final_factor.table: norm_constant += final_factor[row] ppd_table = final_factor.table # aliasing for row in ppd_table: ppd_table[row] = ppd_table[row] / norm_constant return ppd_table
def readFunctionTables(mnCards, mnCliques): """ Reads function tables defined in the input file """ mnFactors = [] factorIndex, factorNum = 0, 0 fClass = Factor(mnCliques[factorIndex]) for i in range(len(fClass.variables)): fClass.setCard(i, mnCards[fClass.variables[i]]) fClass.calculateStrides() factor = [] done = False while not done: inputRow = inFile.readline().strip() if inputRow == "": continue else: if " " in inputRow: splitRow = inputRow.split() if factorNum == 0: factorNum = int(splitRow[0]) for i in range(1, len(splitRow)): factor.append(float(splitRow[i])) else: for i in range(len(splitRow)): factor.append(float(splitRow[i])) if factorNum == len(factor): factorNum = 0 fClass.phi = factor mnFactors.append(fClass) factor = [] factorIndex += 1 if factorIndex < len(mnCliques): fClass = Factor(mnCliques[factorIndex]) for i in range(len(fClass.variables)): fClass.setCard(i, mnCards[fClass.variables[i]]) fClass.calculateStrides() else: factorNum = int(inputRow) if len(mnFactors) >= len(mnCliques): # We have read in all the cliques so break out of while loop done = True return mnFactors