Beispiel #1
0
 def testTwoIndependentInstances(self):
     g1 = self.createSimpleGraph()
     g2 = FactorGraph()
     self.assertTrue(len(g1.nodes))
     self.assertTrue(len(g2.nodes) == 0)
Beispiel #2
0
    def createSimpleGraph(self):
        # create the orphaned nodes
        node_names = ['x1', 'x2', 'x3', 'x4']
        dims = [2, 3, 2, 2]

        # pad array just so that reference like x[1] later is easier to read
        x = [None] + [Variable(node_names[i], dims[i]) for i in range(4)]

        f3 = Factor('f3', np.array([0.2, 0.8]))
        f4 = Factor('f4', np.array([0.5, 0.5]))

        # first index is x3, second index is x4, third index is x2
        # looking at it like: arr[0][0][0]
        f234 = Factor(
            'f234',
            np.array([[[0.3, 0.5, 0.2], [0.1, 0.1, 0.8]],
                      [[0.9, 0.05, 0.05], [0.2, 0.7, 0.1]]]))

        # first index is x2
        f12 = Factor('f12', np.array([[0.8, 0.2], [0.2, 0.8], [0.5, 0.5]]))

        # attach nodes to graph in right order (connections matching
        # factor's potential's dimensions order)
        g = FactorGraph(x[3], silent=True)
        g.append('x3', f234)
        g.append('f234', x[4])
        g.append('f234', x[2])
        g.append('x2', f12)
        g.append('f12', x[1])
        g.append('x3', f3)
        g.append('x4', f4)
        return g
def generate_markov_net(entities, graph, table):
    #now we take a deep breath and start to put all this shit into the 
    #markov model
    #we first imagine the factor graph
    markov_net = FactorGraph(silent=True)
    
    for c in range(len(table.columns)):
        column = table.columns[c]
        #name columns as : C_1, C_2
        column_name = "C_%d" % c
        column_candidates = list(column.candidates)
        column_card = len(column_candidates)
        var1 = Variable(column_name, column_card)

        if column_card == 0:
            continue

        for r in range(len(column.cells)):
            cell = column.cells[r]
            #name cells as : D_1_1, D_2_1
            cell_name = "D_%d_%d" % (r, c)
            cell_candidates = list(cell.candidates)
            cell_card = len(cell_candidates)
            var2 = Variable(cell_name, cell_card)

            if cell_card == 0:
                continue
            
            #for each pair of column-cell, create a factor now
            #needs to hold transition matrix of |col|x|cell|
            #usually it would be nx1
            mat = np.zeros([column_card, cell_card])

            #now using the graph, calculate the probabilities
            #CAREFULLY
            for i in range(column_card):
                col_id = column_candidates[i]
                col_entity = entities[col_id]

                for j in range(cell_card):
                    cell_id = cell_candidates[j]
                    cell_entity = entities[cell_id]

                    score = calcualte_probability_score(entities, graph, col_entity, cell_entity)

                    mat[i][j] = score
            
            # print mat
            factor_name = "%s_%s" % (column_name, cell_name)
            factor = Factor(factor_name, mat)
            markov_net.add(factor)
            markov_net.append(factor_name, var1)
            markov_net.append(factor_name, var2)
    
    #now that shit works then, we add column-column factor nodes
    for c1 in range(len(table.columns)-1):
        #create nodes for both columns
        column_1 = table.columns[c1]
        column_1_name = "C_%d" % c1
        column_1_candidates = list(column_1.candidates)
        column_1_card = len(column_1_candidates)
        var1 = Variable(column_1_name, column_1_card)

        if column_1_card == 0:
            continue

        for c2 in range(c1+1, len(table.columns)):
            column_2 = table.columns[c2]
            column_2_name = "C_%d" % c2
            column_2_candidates = list(column_2.candidates)
            column_2_card = len(column_2_candidates)
            var2 = Variable(column_2_name, column_2_card)

            if column_2_card == 0:
                continue 
            
            #for each pair of column-cell, create a factor now
            #needs to hold transition matrix of |col1|x|col2|
            #usually it would be nx1
            mat = np.zeros([column_1_card, column_2_card])

            #we gotta loop candidates of both now
            for i in range(column_1_card):
                col_1_id = column_1_candidates[i]
                col_1_entity = entities[col_1_id]

                for j in range(column_2_card):
                    col_2_id = column_2_candidates[j]
                    col_2_entity = entities[col_2_id]

                    score = calcualte_probability_score(entities, graph, col_1_entity, col_2_entity)

                    mat[i][j] = score
            
            # C_1_C_2
            factor_name = "%s_%s" % (column_1_name, column_2_name)
            factor = Factor(factor_name, mat)
            markov_net.add(factor)
            markov_net.append(factor_name, var1)
            markov_net.append(factor_name, var2)
    
    return markov_net
Beispiel #4
0
def generate_markov_net(graph, table):
    ne = table.get_NE_cols()
    #now we take a deep breath and start to put all this shit into the
    #markov model
    #we first imagine the factor graph
    markov_net = FactorGraph(silent=True)

    for c in range(len(ne.columns)):
        column = ne.columns[c]
        #name columns as : C_1, C_2
        column_name = "C_%d" % c
        column_candidates = data[column.header].items()
        column_card = len(column_candidates)
        var1 = Variable(column_name, column_card)

        if column_card == 0:
            continue

        for r in range(len(column.cells)):
            cell = column.cells[r]
            #name cells as : D_1_1, D_2_1
            cell_name = "D_%d_%d" % (r, c)
            cell_candidates = cell.predicted_labels
            cell_card = len(cell_candidates)
            var2 = Variable(cell_name, cell_card)

            if cell_card == 0:
                continue

            #for each pair of column-cell, create a factor now
            #needs to hold transition matrix of |col|x|cell|
            #usually it would be nx1
            mat = np.zeros([column_card, cell_card])

            #now using the graph, calculate the probabilities
            #CAREFULLY
            for i in range(column_card):
                col_candidate = column_candidates[i][0]
                # print i, col_candidate

                if column_candidates[i][1]["type"] == "class":
                    col_id = "dbo:" + col_candidate.replace(
                        "http://dbpedia.org/ontology/", "")
                else:
                    col_id = "dbp:" + col_candidate.replace(
                        "http://dbpedia.org/ontology/", "")

                col_entity = entities[col_id]

                for j in range(cell_card):
                    cell_candidate = cell_candidates[j][0]

                    cell_id = "dbr:" + cell_candidate.replace(
                        "http://dbpedia.org/resource/", "")
                    cell_entity = entities[cell_id]

                    score = calcualte_probability_score(
                        graph, col_entity, cell_entity)

                    mat[i][j] = score

            # print mat
            factor_name = "%s_%s" % (column_name, cell_name)
            factor = Factor(factor_name, mat)
            markov_net.add(factor)
            markov_net.append(factor_name, var1)
            markov_net.append(factor_name, var2)

    #now that shit works then, we add column-column factor nodes
    for c1 in range(len(ne.columns) - 1):
        #create nodes for both columns
        column_1 = ne.columns[c1]
        column_1_name = "C_%d" % c1
        column_1_candidates = data[column_1.header].items()
        column_1_card = len(column_1_candidates)
        var1 = Variable(column_1_name, column_1_card)

        if column_1_card == 0:
            continue

        for c2 in range(c1 + 1, len(ne.columns)):

            column_2 = ne.columns[c2]
            column_2_name = "C_%d" % c2
            column_2_candidates = data[column_2.header].items()
            column_2_card = len(column_2_candidates)
            var2 = Variable(column_2_name, column_2_card)

            if column_2_card == 0:
                continue

            #for each pair of column-cell, create a factor now
            #needs to hold transition matrix of |col1|x|col2|
            #usually it would be nx1
            mat = np.zeros([column_1_card, column_2_card])

            #we gotta loop candidates of both now
            for i in range(column_1_card):
                col_1_candidate = column_1_candidates[i][0]
                if column_1_candidates[i][1]["type"] == "class":
                    col_1_id = "dbo:" + col_1_candidate.replace(
                        "http://dbpedia.org/ontology/", "")
                else:
                    col_1_id = "dbp:" + col_1_candidate.replace(
                        "http://dbpedia.org/ontology/", "")
                col_1_entity = entities[col_1_id]

                for j in range(column_2_card):
                    col_2_candidate = column_2_candidates[j][0]
                    if column_2_candidates[j][1]["type"] == "class":
                        col_2_id = "dbo:" + col_2_candidate.replace(
                            "http://dbpedia.org/ontology/", "")
                    else:
                        col_2_id = "dbp:" + col_2_candidate.replace(
                            "http://dbpedia.org/ontology/", "")
                    col_2_entity = entities[col_2_id]

                    score = calcualte_probability_score(
                        graph, col_1_entity, col_2_entity)

                    mat[i][j] = score

            # C_1_C_2
            factor_name = "%s_%s" % (column_1_name, column_2_name)
            factor = Factor(factor_name, mat)
            markov_net.add(factor)
            markov_net.append(factor_name, var1)
            markov_net.append(factor_name, var2)

    return markov_net
Beispiel #5
0
def compute_final_solution_phase_3(xc, yc, probability_map_phase_2,
                                   ncandidates, sde, delta, T, edges):
    (height, width, nldms) = probability_map_phase_2.shape
    x_candidates = []  # np.zeros((nldms,ncandidates))
    y_candidates = []  # np.zeros((nldms,ncandidates))

    for i in range(nldms):
        val = -np.sort(-probability_map_phase_2[:, :,
                                                i].flatten())[ncandidates]
        if val > 0:
            (y, x) = np.where(probability_map_phase_2[:, :, i] >= val)
        else:
            (y, x) = np.where(probability_map_phase_2[:, :, i] > val)

        if y.size > ncandidates:
            vals = -probability_map_phase_2[y, x, i]
            order = np.argsort(vals)[0:ncandidates]
            y = y[order]
            x = x[order]

        x_candidates.append(x.tolist())
        y_candidates.append(y.tolist())

    b_mat = build_bmat_phase_3(xc, yc, T, x_candidates, y_candidates, edges,
                               sde)

    g = FactorGraph(silent=True)
    nodes = [Variable('x%d' % i, len(x_candidates[i])) for i in range(nldms)]
    for ip in range(nldms):
        for ipl in edges[ip, :].astype(int):
            g.add(Factor('f2_%d_%d' % (ip, ipl), b_mat[(ip, ipl)]))
            g.append('f2_%d_%d' % (ip, ipl), nodes[ip])
            g.append('f2_%d_%d' % (ip, ipl), nodes[ipl])
    for i in range(nldms):
        v = probability_map_phase_2[np.array(y_candidates[i]),
                                    np.array(x_candidates[i]), i]
        g.add(Factor('f1_%d' % i, v / np.sum(v)))
        g.append('f1_%d' % i, nodes[i])
    g.compute_marginals()

    x_final = np.zeros(nldms)
    y_final = np.zeros(nldms)

    for i in range(nldms):
        amin = np.argmax(g.nodes['x%d' % i].marginal())
        x_final[i] = x_candidates[i][amin]
        y_final[i] = y_candidates[i][amin]
    return x_final / delta, y_final / delta
Beispiel #6
0
import csv
import numpy as np
import random
from sumproduct import Variable, Factor, FactorGraph

numOfFactors = 2  #distance only!
fileName = 'distances.csv'

# init Factor Graph
graph = FactorGraph(silent=False)

# variable map
reviewer2id = {}
id2reviewer = {}

current_id = 0
i = 0
# load csv file
with open(fileName, 'rb') as f:
    data = csv.reader(f, delimiter=',')
    for row in data:
        reviewer1 = row[1]
        reviewer2 = row[2]
        if (reviewer1 not in reviewer2id):
            reviewer2id[reviewer1] = current_id
            id2reviewer[current_id] = reviewer1
            current_id += 1
        if (reviewer2 not in reviewer2id):
            reviewer2id[reviewer2] = current_id
            id2reviewer[current_id] = reviewer2
            current_id += 1