Example #1
0
def gtype_distance(gt):
    n = len(gt)
    gt_dist = np.zeros((n,n), dtype=int)
    for i,gi in enumerate(gt):
        for j,gj in enumerate(gt):
            gt_dist[i,j] = min(strdist(gi,gj),strdist(gi,gj[::-1]))
    return gt_dist
    def treedist(i, j):
        Al = A.lmds
        Bl = B.lmds
        An = A.nodes
        Bn = B.nodes

        m = i - Al[i] + 2
        n = j - Bl[j] + 2
        fd = forestdist = np.zeros((m,n), int)
        
        ioff = Al[i] - 1
        joff = Bl[j] - 1

        for x in xrange(1, m): # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ)
            fd[x][0] = fd[x-1][0] + strdist(An[x-1].label, '')
        for y in xrange(1, n): # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w)
            fd[0][y] = fd[0][y-1] + strdist('', Bn[y-1].label)

        for x in xrange(1, m): ## the plus one is for the xrange impl
            for y in xrange(1, n):
                # only need to check if x is an ancestor of i
                # and y is an ancestor of j
                if Al[i] == Al[x+ioff] and Bl[j] == Bl[y+joff]:
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w)
                    #                   +-
                    fd[x][y] = min(
                        fd[x-1][y] + strdist(An[x+ioff].label, ''),
                        fd[x][y-1] + strdist('', Bn[y+joff].label), 
                        fd[x-1][y-1] + strdist(An[x+ioff].label, Bn[y+joff].label)
                    )                        
                    treedists[x+ioff][y+joff] = fd[x][y]
                else:
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..l(i)-1, l(j1)..l(j)-1)
                    #                   |                     + treedist(i1,j1)
                    #                   +-
                    p = Al[x+ioff]-1-ioff
                    q = Bl[y+joff]-1-joff
                    #print (p, q), (len(fd), len(fd[0]))
                    fd[x][y] = min(
                        fd[x-1][y] + strdist(An[x+ioff].label, ''),
                        fd[x][y-1] + strdist('', Bn[y+joff].label), 
                        fd[p][q] + treedists[x+ioff][y+joff]
                    )                        
Example #3
0
    def treedist(i, j):
        Al = A.lmds
        Bl = B.lmds
        An = A.nodes
        Bn = B.nodes

        m = i - Al[i] + 2
        n = j - Bl[j] + 2
        fd = forestdist = np.zeros((m, n), int)

        ioff = Al[i] - 1
        joff = Bl[j] - 1

        for x in xrange(1, m):  # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ)
            fd[x][0] = fd[x - 1][0] + strdist(An[x - 1].label, '')
        for y in xrange(1, n):  # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w)
            fd[0][y] = fd[0][y - 1] + strdist('', Bn[y - 1].label)

        for x in xrange(1, m):  ## the plus one is for the xrange impl
            for y in xrange(1, n):
                # only need to check if x is an ancestor of i
                # and y is an ancestor of j
                if Al[i] == Al[x + ioff] and Bl[j] == Bl[y + joff]:
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w)
                    #                   +-
                    fd[x][y] = min(
                        fd[x - 1][y] + strdist(An[x + ioff].label, ''),
                        fd[x][y - 1] + strdist('', Bn[y + joff].label),
                        fd[x - 1][y - 1] +
                        strdist(An[x + ioff].label, Bn[y + joff].label))
                    treedists[x + ioff][y + joff] = fd[x][y]
                else:
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..l(i)-1, l(j1)..l(j)-1)
                    #                   |                     + treedist(i1,j1)
                    #                   +-
                    p = Al[x + ioff] - 1 - ioff
                    q = Bl[y + joff] - 1 - joff
                    #print (p, q), (len(fd), len(fd[0]))
                    fd[x][y] = min(
                        fd[x - 1][y] + strdist(An[x + ioff].label, ''),
                        fd[x][y - 1] + strdist('', Bn[y + joff].label),
                        fd[p][q] + treedists[x + ioff][y + joff])
Example #4
0
def weird_dist(A, B):
    return 10 * strdist(A, B)
Example #5
0
def weird_dist(A, B):
    return 10*strdist(A, B)
Example #6
0
def compare_trees(tree_size, number_of_trees):
    print('Create instances')
    create_random_binary_trees(tree_size, number_of_trees)
    file_name = 'examples/example_trees_size_' + tree_size.__str__() + '.json'
    print('Instances created successfully!')
    print('Instances can be found in ' + file_name)
    if os.path.exists(file_name):
        with open(file_name) as tree_file: 
            tree_list = json.load(tree_file)
            
        #Only compare with ated
        keys = {"ATED": 0.5, "CTED": 0, "STED": 1}
        
        size_start = time.time()
        for i in range(0, min(len(tree_list),number_of_trees)):
            #Loop output
            loop_time = time.time()
            j = i + 1
            needed_time = loop_time - size_start
            estimation = needed_time / j * number_of_trees
            print("(" + str(timedelta(seconds=round(needed_time))) + " / " + str(timedelta(seconds=round(estimation)))
                + ") (" + str(j) + "/" + str(number_of_trees) + ") tree size: " + str(tree_size))
                
            tree_one = create_binary_tree_from_list(tree_list[i]['one'])
            tree_two = create_binary_tree_from_list(tree_list[i]['two'])
            if ('one_adapted' not in tree_list[i]):
                tree_one_adapted = adapt_tree_one(tree_one, tree_two)
                tree_list[i]['one_adapted'] = tree_one_adapted.get_tree_list(tree_one_adapted)
            tree_one_adapted = create_binary_tree_from_list(tree_list[i]['one_adapted'])
            if ('#GRFRestr' not in tree_list[i]):
                I = compute_invalid_edges(tree_one.get_clusters(1), tree_two.get_clusters(1))
                tree_list[i]['#GRFRestr'] = len(I)
            #Compute gRF distance with varying 'k'
            for k in [1,4,16,64]:
                key = 'GRF' + str(k)
                if (key not in tree_list[i] and tree_size <= 32):
                    start = time.time()
                    print( "k is " + str(k))
                    lpProblem = createLPproblem(tree_one, tree_two, k)
                    lp = lpProblem.get("lp")
                    time_creation = time.time() - start
                    lp.solve()
                    c1 = lpProblem.get("c1")
                    c2 = lpProblem.get("c2")
                    if LpStatus[lp.status] == "Optimal":
                        end = time.time()
                        varsdict = {}
                        for v in lp.variables():
                            varsdict[v.name] = v.varValue
                        gRF = 0
                        for m in range(0,len(c1)):
                            gRF = gRF + 1
                            for l in range(0,len(c2)):
                                kex = "x_" + str(m) + "_" + str(l)
                                if (varsdict[kex] == 1.0):
                                    cup = [i for i in c1[m] if i in c2[l]]
                                    gRF = gRF - len(cup)/(len(c1[m]) + len(c2[l]) - len(cup))
                        for m in range(0,len(c2)):
                            used = 0
                            for l in range(0,len(c1)):
                                kex = "x_" + str(l) + "_" + str(m)
                                if (varsdict[kex] == 1.0):
                                    used = 1
                            if used == 0:
                                gRF = gRF + 1
                        solution = {'clusterOne': c1,
                                    'clusterTwo': c2,
                                    'vardsDict': json.dumps(varsdict)}
                        tree_list[i]['GRF' + str(k)] = {"cost": gRF, "time": end - start,
                        "time_creation": time_creation}
                        
            #Compute all TEDs defined in variable 'keys'
            for key,k in keys.items():
                if (key not in tree_list[i]):
                    start = time.time()
                    print(key)
                    cost = zss.distance(
                            tree_one, tree_two, tree_one.get_children,insert_cost_delta(k), remove_cost_delta(k),
                            update_cost=lambda a, b: strdist(ExtendedNode.get_label(a), ExtendedNode.get_label(b)))
                    end = time.time()
                    tree_list[i][key] = {"cost": cost, "time": end - start}
                key2 = key + "_a"
                if (key2 not in tree_list[i]):
                    start = time.time()
                    print(key2)
                    cost = zss.distance(
                            tree_one_adapted, tree_two, tree_one.get_children,insert_cost_delta(k), remove_cost_delta(k),
                            update_cost=lambda a, b: strdist(ExtendedNode.get_label(a), ExtendedNode.get_label(b)))
                    end = time.time()
                    tree_list[i][key2] = {"cost": cost, "time": end - start}

            with open(file_name, 'w') as outfile:
                json.dump(tree_list, outfile)
Example #7
0
 def remove_cost(node):
     if (ExtendedNode.get_label(node) != 0):
         return strdist(ExtendedNode.get_label(node), '')
     else:
         return delta
Example #8
0
 def insert_cost(node):
     if (ExtendedNode.get_label(node) != 0):
         return strdist('', ExtendedNode.get_label(node))
     else:
         return delta
Example #9
0
def weird_update_dist(A, B):
    return strdist(A,B)
Example #10
0
    def treedist(i, j):
        if i in treedists and j in treedists[i]: return treedists[i][j]
        def s(i, j, v):
            if i not in treedists: treedists[i] = dict()
            treedists[i][j] = v

        fd = forestdists = dict()
        def gfd(a, b): # get an item from the forest dists array
            if (a,b) in forestdists:
                return forestdists[(a,b)]
            if a[0] >= a[1] and b[0] >= b[1]: # δ(θ, θ) = 0
                return 0
            if b[0] >= b[1]:
                return forestdists[(a,(0,0))]
            if a[0] >= a[1]:
                return forestdists[((0,0),b)]
            raise KeyError, (a,b)

        Al = A.lmds
        Bl = B.lmds
        An = A.nodes
        Bn = B.nodes

        for x in xrange(Al[i], i+1): # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ)
            fd[(Al[i], x), (0, 0)] = (
                gfd((Al[i],x-1), (0, 0)) + strdist(An[x].label, '')
            )
        for y in xrange(Bl[j], j+1): # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w)
            fd[(0, 0), (Bl[j], y)] = (
                gfd((0,0), (Bl[j],y-1)) + strdist('', Bn[y].label)
            )

        for x in xrange(Al[i], i+1): ## the plus one is for the xrange impl
            for y in xrange(Bl[j], j+1):
                # only need to check if x is an ancestor of i
                # and y is an ancestor of j
                if (A.lmds[i] == A.lmds[x] and B.lmds[j] == B.lmds[y] or
                  (x == i and y == j)):
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w)
                    #                   +-
                    fd[((Al[i], x), (Bl[j], y))] = min(
                        (
                            gfd((Al[i],x-1), (Bl[j], y))
                            + strdist(An[x].label, '')
                        ),
                        (
                            gfd((Al[i], x), (Bl[j],y-1))
                            + strdist('', Bn[y].label)
                        ),
                        (
                            gfd((Al[i],x-1), (Bl[j],y-1))
                            +strdist(An[x].label, Bn[y].label)
                        )
                    )
                    s(x, y, fd[((Al[i], x), (Bl[j], y))])
                else:
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..l(i)-1, l(j1)..l(j)-1) + treedist(i,j)
                    #                   +-
                    fd[((Al[i], x), (Bl[j], y))] = min(
                        (
                            gfd((Al[i],x-1), (Bl[j], y))
                            + strdist(An[x].label, '')
                        ),
                        (
                            gfd((Al[i], x), (Bl[j],y-1))
                            + strdist('', Bn[y].label)
                        ),
                        (
                            gfd((Al[i],Al[x]-1), (Bl[j],Bl[y]-1))
                            + treedist(x, y)
                        )
                    )
        if i in treedists and j in treedists[i]:
            return treedists[i][j]
        else:
            print 'WTF'
            print (A.lmds[i], i), (B.lmds[j], j), tuple(xrange(A.lmds[i], i+1)), tuple(xrange(B.lmds[j], j+1))
            print x,y
            print treedists
            sys.exit(1)
Example #11
0
def my_distance(node1, node2):
    return strdist(node1, node2)
Example #12
0
def weird_dist(A, B):
    return strdist(A, B)
Example #13
0
    def treedist(i, j):
        if i in treedists and j in treedists[i]: return treedists[i][j]
        def s(i, j, v):
            if i not in treedists: treedists[i] = dict()
            treedists[i][j] = v

        fd = forestdists = dict()
        def gfd(a, b): # get an item from the forest dists array
            if (a,b) in forestdists:
                return forestdists[(a,b)]
            if a[0] >= a[1] and b[0] >= b[1]: # δ(θ, θ) = 0
                return 0
            if b[0] >= b[1]:
                return forestdists[(a,(0,0))]
            if a[0] >= a[1]:
                return forestdists[((0,0),b)]
            raise KeyError, (a,b)

        Al = A.lmds
        Bl = B.lmds
        An = A.nodes
        Bn = B.nodes

        for x in xrange(Al[i], i+1): # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ)
            fd[(Al[i], x), (0, 0)] = (
                gfd((Al[i],x-1), (0, 0)) + strdist(An[x].label, '')
            )
        for y in xrange(Bl[j], j+1): # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w)
            fd[(0, 0), (Bl[j], y)] = (
                gfd((0,0), (Bl[j],y-1)) + strdist('', Bn[y].label)
            )

        for x in xrange(Al[i], i+1): ## the plus one is for the xrange impl
            for y in xrange(Bl[j], j+1):
                # only need to check if x is an ancestor of i
                # and y is an ancestor of j
                if A.lmds[i] == A.lmds[x] and B.lmds[j] == B.lmds[y]:
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w)
                    #                   +-
                    fd[((Al[i], x), (Bl[j], y))] = min(
                        (
                            gfd((Al[i],x-1), (Bl[j], y))
                            + strdist(An[x].label, '')
                        ),
                        (
                            gfd((Al[i], x), (Bl[j],y-1))
                            + strdist('', Bn[y].label)
                        ),
                        (
                            gfd((Al[i],x-1), (Bl[j],y-1))
                            +strdist(An[x].label, Bn[y].label)
                        )
                    )
                    s(x, y, fd[((Al[i], x), (Bl[j], y))])
                else:
                    #                   +-
                    #                   | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ)
                    # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w)
                    #                   | δ(l(i1)..l(i)-1, l(j1)..l(j)-1) + treedist(i,j)
                    #                   +-
                    fd[((Al[i], x), (Bl[j], y))] = min(
                        (
                            gfd((Al[i],x-1), (Bl[j], y))
                            + strdist(An[x].label, '')
                        ),
                        (
                            gfd((Al[i], x), (Bl[j],y-1))
                            + strdist('', Bn[y].label)
                        ),
                        (
                            gfd((Al[i],Al[x]-1), (Bl[j],Bl[y]-1))
                            + treedist(x, y)
                        )
                    )
        if i in treedists and j in treedists[i]:
            return treedists[i][j]
        else:
            print('WTF')
            print(A.lmds[i], i), (B.lmds[j], j), tuple(xrange(A.lmds[i], i+1)), tuple(xrange(B.lmds[j], j+1))
            print(x, y)
            print(treedists)
            sys.exit(1)