def createSimpleGraph(self): # create the orphaned nodes node_names = ['x1', 'x2', 'x3', 'x4'] dims = [2, 3, 2, 2] # pad array just so that reference like x[1] later is easier to read x = [None] + [Variable(node_names[i], dims[i]) for i in range(4)] f3 = Factor('f3', np.array([0.2, 0.8])) f4 = Factor('f4', np.array([0.5, 0.5])) # first index is x3, second index is x4, third index is x2 # looking at it like: arr[0][0][0] f234 = Factor( 'f234', np.array([[[0.3, 0.5, 0.2], [0.1, 0.1, 0.8]], [[0.9, 0.05, 0.05], [0.2, 0.7, 0.1]]])) # first index is x2 f12 = Factor('f12', np.array([[0.8, 0.2], [0.2, 0.8], [0.5, 0.5]])) # attach nodes to graph in right order (connections matching # factor's potential's dimensions order) g = FactorGraph(x[3], silent=True) g.append('x3', f234) g.append('f234', x[4]) g.append('f234', x[2]) g.append('x2', f12) g.append('f12', x[1]) g.append('x3', f3) g.append('x4', f4) return g
def compute_final_solution_phase_3(xc, yc, probability_map_phase_2, ncandidates, sde, delta, T, edges): (height, width, nldms) = probability_map_phase_2.shape x_candidates = [] # np.zeros((nldms,ncandidates)) y_candidates = [] # np.zeros((nldms,ncandidates)) for i in range(nldms): val = -np.sort(-probability_map_phase_2[:, :, i].flatten())[ncandidates] if val > 0: (y, x) = np.where(probability_map_phase_2[:, :, i] >= val) else: (y, x) = np.where(probability_map_phase_2[:, :, i] > val) if y.size > ncandidates: vals = -probability_map_phase_2[y, x, i] order = np.argsort(vals)[0:ncandidates] y = y[order] x = x[order] x_candidates.append(x.tolist()) y_candidates.append(y.tolist()) b_mat = build_bmat_phase_3(xc, yc, T, x_candidates, y_candidates, edges, sde) g = FactorGraph(silent=True) nodes = [Variable('x%d' % i, len(x_candidates[i])) for i in range(nldms)] for ip in range(nldms): for ipl in edges[ip, :].astype(int): g.add(Factor('f2_%d_%d' % (ip, ipl), b_mat[(ip, ipl)])) g.append('f2_%d_%d' % (ip, ipl), nodes[ip]) g.append('f2_%d_%d' % (ip, ipl), nodes[ipl]) for i in range(nldms): v = probability_map_phase_2[np.array(y_candidates[i]), np.array(x_candidates[i]), i] g.add(Factor('f1_%d' % i, v / np.sum(v))) g.append('f1_%d' % i, nodes[i]) g.compute_marginals() x_final = np.zeros(nldms) y_final = np.zeros(nldms) for i in range(nldms): amin = np.argmax(g.nodes['x%d' % i].marginal()) x_final[i] = x_candidates[i][amin] y_final[i] = y_candidates[i][amin] return x_final / delta, y_final / delta
def generate_markov_net(entities, graph, table): #now we take a deep breath and start to put all this shit into the #markov model #we first imagine the factor graph markov_net = FactorGraph(silent=True) for c in range(len(table.columns)): column = table.columns[c] #name columns as : C_1, C_2 column_name = "C_%d" % c column_candidates = list(column.candidates) column_card = len(column_candidates) var1 = Variable(column_name, column_card) if column_card == 0: continue for r in range(len(column.cells)): cell = column.cells[r] #name cells as : D_1_1, D_2_1 cell_name = "D_%d_%d" % (r, c) cell_candidates = list(cell.candidates) cell_card = len(cell_candidates) var2 = Variable(cell_name, cell_card) if cell_card == 0: continue #for each pair of column-cell, create a factor now #needs to hold transition matrix of |col|x|cell| #usually it would be nx1 mat = np.zeros([column_card, cell_card]) #now using the graph, calculate the probabilities #CAREFULLY for i in range(column_card): col_id = column_candidates[i] col_entity = entities[col_id] for j in range(cell_card): cell_id = cell_candidates[j] cell_entity = entities[cell_id] score = calcualte_probability_score(entities, graph, col_entity, cell_entity) mat[i][j] = score # print mat factor_name = "%s_%s" % (column_name, cell_name) factor = Factor(factor_name, mat) markov_net.add(factor) markov_net.append(factor_name, var1) markov_net.append(factor_name, var2) #now that shit works then, we add column-column factor nodes for c1 in range(len(table.columns)-1): #create nodes for both columns column_1 = table.columns[c1] column_1_name = "C_%d" % c1 column_1_candidates = list(column_1.candidates) column_1_card = len(column_1_candidates) var1 = Variable(column_1_name, column_1_card) if column_1_card == 0: continue for c2 in range(c1+1, len(table.columns)): column_2 = table.columns[c2] column_2_name = "C_%d" % c2 column_2_candidates = list(column_2.candidates) column_2_card = len(column_2_candidates) var2 = Variable(column_2_name, column_2_card) if column_2_card == 0: continue #for each pair of column-cell, create a factor now #needs to hold transition matrix of |col1|x|col2| #usually it would be nx1 mat = np.zeros([column_1_card, column_2_card]) #we gotta loop candidates of both now for i in range(column_1_card): col_1_id = column_1_candidates[i] col_1_entity = entities[col_1_id] for j in range(column_2_card): col_2_id = column_2_candidates[j] col_2_entity = entities[col_2_id] score = calcualte_probability_score(entities, graph, col_1_entity, col_2_entity) mat[i][j] = score # C_1_C_2 factor_name = "%s_%s" % (column_1_name, column_2_name) factor = Factor(factor_name, mat) markov_net.add(factor) markov_net.append(factor_name, var1) markov_net.append(factor_name, var2) return markov_net
def generate_markov_net(graph, table): ne = table.get_NE_cols() #now we take a deep breath and start to put all this shit into the #markov model #we first imagine the factor graph markov_net = FactorGraph(silent=True) for c in range(len(ne.columns)): column = ne.columns[c] #name columns as : C_1, C_2 column_name = "C_%d" % c column_candidates = data[column.header].items() column_card = len(column_candidates) var1 = Variable(column_name, column_card) if column_card == 0: continue for r in range(len(column.cells)): cell = column.cells[r] #name cells as : D_1_1, D_2_1 cell_name = "D_%d_%d" % (r, c) cell_candidates = cell.predicted_labels cell_card = len(cell_candidates) var2 = Variable(cell_name, cell_card) if cell_card == 0: continue #for each pair of column-cell, create a factor now #needs to hold transition matrix of |col|x|cell| #usually it would be nx1 mat = np.zeros([column_card, cell_card]) #now using the graph, calculate the probabilities #CAREFULLY for i in range(column_card): col_candidate = column_candidates[i][0] # print i, col_candidate if column_candidates[i][1]["type"] == "class": col_id = "dbo:" + col_candidate.replace( "http://dbpedia.org/ontology/", "") else: col_id = "dbp:" + col_candidate.replace( "http://dbpedia.org/ontology/", "") col_entity = entities[col_id] for j in range(cell_card): cell_candidate = cell_candidates[j][0] cell_id = "dbr:" + cell_candidate.replace( "http://dbpedia.org/resource/", "") cell_entity = entities[cell_id] score = calcualte_probability_score( graph, col_entity, cell_entity) mat[i][j] = score # print mat factor_name = "%s_%s" % (column_name, cell_name) factor = Factor(factor_name, mat) markov_net.add(factor) markov_net.append(factor_name, var1) markov_net.append(factor_name, var2) #now that shit works then, we add column-column factor nodes for c1 in range(len(ne.columns) - 1): #create nodes for both columns column_1 = ne.columns[c1] column_1_name = "C_%d" % c1 column_1_candidates = data[column_1.header].items() column_1_card = len(column_1_candidates) var1 = Variable(column_1_name, column_1_card) if column_1_card == 0: continue for c2 in range(c1 + 1, len(ne.columns)): column_2 = ne.columns[c2] column_2_name = "C_%d" % c2 column_2_candidates = data[column_2.header].items() column_2_card = len(column_2_candidates) var2 = Variable(column_2_name, column_2_card) if column_2_card == 0: continue #for each pair of column-cell, create a factor now #needs to hold transition matrix of |col1|x|col2| #usually it would be nx1 mat = np.zeros([column_1_card, column_2_card]) #we gotta loop candidates of both now for i in range(column_1_card): col_1_candidate = column_1_candidates[i][0] if column_1_candidates[i][1]["type"] == "class": col_1_id = "dbo:" + col_1_candidate.replace( "http://dbpedia.org/ontology/", "") else: col_1_id = "dbp:" + col_1_candidate.replace( "http://dbpedia.org/ontology/", "") col_1_entity = entities[col_1_id] for j in range(column_2_card): col_2_candidate = column_2_candidates[j][0] if column_2_candidates[j][1]["type"] == "class": col_2_id = "dbo:" + col_2_candidate.replace( "http://dbpedia.org/ontology/", "") else: col_2_id = "dbp:" + col_2_candidate.replace( "http://dbpedia.org/ontology/", "") col_2_entity = entities[col_2_id] score = calcualte_probability_score( graph, col_1_entity, col_2_entity) mat[i][j] = score # C_1_C_2 factor_name = "%s_%s" % (column_1_name, column_2_name) factor = Factor(factor_name, mat) markov_net.add(factor) markov_net.append(factor_name, var1) markov_net.append(factor_name, var2) return markov_net
current_id += 1 if (reviewer2 not in reviewer2id): reviewer2id[reviewer2] = current_id id2reviewer[current_id] = reviewer2 current_id += 1 temp1 = Variable(str(reviewer2id[reviewer1]), 2) temp2 = Variable(str(reviewer2id[reviewer2]), 2) common_prod = float(row[3]) common_burst = float(row[4]) dist = float(row[5]) dist_fact_name = 'distf' + str(reviewer2id[reviewer1]) + '_' + str( reviewer2id[reviewer2]) dist_factor = Factor(dist_fact_name, np.array([[1 - dist, dist], [dist, 1 - dist]])) graph.add(dist_factor) graph.append(dist_fact_name, temp2) graph.append(dist_fact_name, temp1) print 'adding pair no:', i i += 1 num_of_reviewers = len(reviewer2id) reviewers = list(reviewer2id.keys()) random.shuffle(reviewers) reviewers = reviewers[:num_of_reviewers / 5] for i in reviewers: graph.observe(str(reviewer2id[i]), 2) graph.compute_marginals(max_iter=500, tolerance=1e-4) with open('output.csv', 'w') as f: