def gtype_distance(gt): n = len(gt) gt_dist = np.zeros((n,n), dtype=int) for i,gi in enumerate(gt): for j,gj in enumerate(gt): gt_dist[i,j] = min(strdist(gi,gj),strdist(gi,gj[::-1])) return gt_dist
def treedist(i, j): Al = A.lmds Bl = B.lmds An = A.nodes Bn = B.nodes m = i - Al[i] + 2 n = j - Bl[j] + 2 fd = forestdist = np.zeros((m,n), int) ioff = Al[i] - 1 joff = Bl[j] - 1 for x in xrange(1, m): # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ) fd[x][0] = fd[x-1][0] + strdist(An[x-1].label, '') for y in xrange(1, n): # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w) fd[0][y] = fd[0][y-1] + strdist('', Bn[y-1].label) for x in xrange(1, m): ## the plus one is for the xrange impl for y in xrange(1, n): # only need to check if x is an ancestor of i # and y is an ancestor of j if Al[i] == Al[x+ioff] and Bl[j] == Bl[y+joff]: # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w) # +- fd[x][y] = min( fd[x-1][y] + strdist(An[x+ioff].label, ''), fd[x][y-1] + strdist('', Bn[y+joff].label), fd[x-1][y-1] + strdist(An[x+ioff].label, Bn[y+joff].label) ) treedists[x+ioff][y+joff] = fd[x][y] else: # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..l(i)-1, l(j1)..l(j)-1) # | + treedist(i1,j1) # +- p = Al[x+ioff]-1-ioff q = Bl[y+joff]-1-joff #print (p, q), (len(fd), len(fd[0])) fd[x][y] = min( fd[x-1][y] + strdist(An[x+ioff].label, ''), fd[x][y-1] + strdist('', Bn[y+joff].label), fd[p][q] + treedists[x+ioff][y+joff] )
def treedist(i, j): Al = A.lmds Bl = B.lmds An = A.nodes Bn = B.nodes m = i - Al[i] + 2 n = j - Bl[j] + 2 fd = forestdist = np.zeros((m, n), int) ioff = Al[i] - 1 joff = Bl[j] - 1 for x in xrange(1, m): # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ) fd[x][0] = fd[x - 1][0] + strdist(An[x - 1].label, '') for y in xrange(1, n): # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w) fd[0][y] = fd[0][y - 1] + strdist('', Bn[y - 1].label) for x in xrange(1, m): ## the plus one is for the xrange impl for y in xrange(1, n): # only need to check if x is an ancestor of i # and y is an ancestor of j if Al[i] == Al[x + ioff] and Bl[j] == Bl[y + joff]: # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w) # +- fd[x][y] = min( fd[x - 1][y] + strdist(An[x + ioff].label, ''), fd[x][y - 1] + strdist('', Bn[y + joff].label), fd[x - 1][y - 1] + strdist(An[x + ioff].label, Bn[y + joff].label)) treedists[x + ioff][y + joff] = fd[x][y] else: # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..l(i)-1, l(j1)..l(j)-1) # | + treedist(i1,j1) # +- p = Al[x + ioff] - 1 - ioff q = Bl[y + joff] - 1 - joff #print (p, q), (len(fd), len(fd[0])) fd[x][y] = min( fd[x - 1][y] + strdist(An[x + ioff].label, ''), fd[x][y - 1] + strdist('', Bn[y + joff].label), fd[p][q] + treedists[x + ioff][y + joff])
def weird_dist(A, B): return 10 * strdist(A, B)
def weird_dist(A, B): return 10*strdist(A, B)
def compare_trees(tree_size, number_of_trees): print('Create instances') create_random_binary_trees(tree_size, number_of_trees) file_name = 'examples/example_trees_size_' + tree_size.__str__() + '.json' print('Instances created successfully!') print('Instances can be found in ' + file_name) if os.path.exists(file_name): with open(file_name) as tree_file: tree_list = json.load(tree_file) #Only compare with ated keys = {"ATED": 0.5, "CTED": 0, "STED": 1} size_start = time.time() for i in range(0, min(len(tree_list),number_of_trees)): #Loop output loop_time = time.time() j = i + 1 needed_time = loop_time - size_start estimation = needed_time / j * number_of_trees print("(" + str(timedelta(seconds=round(needed_time))) + " / " + str(timedelta(seconds=round(estimation))) + ") (" + str(j) + "/" + str(number_of_trees) + ") tree size: " + str(tree_size)) tree_one = create_binary_tree_from_list(tree_list[i]['one']) tree_two = create_binary_tree_from_list(tree_list[i]['two']) if ('one_adapted' not in tree_list[i]): tree_one_adapted = adapt_tree_one(tree_one, tree_two) tree_list[i]['one_adapted'] = tree_one_adapted.get_tree_list(tree_one_adapted) tree_one_adapted = create_binary_tree_from_list(tree_list[i]['one_adapted']) if ('#GRFRestr' not in tree_list[i]): I = compute_invalid_edges(tree_one.get_clusters(1), tree_two.get_clusters(1)) tree_list[i]['#GRFRestr'] = len(I) #Compute gRF distance with varying 'k' for k in [1,4,16,64]: key = 'GRF' + str(k) if (key not in tree_list[i] and tree_size <= 32): start = time.time() print( "k is " + str(k)) lpProblem = createLPproblem(tree_one, tree_two, k) lp = lpProblem.get("lp") time_creation = time.time() - start lp.solve() c1 = lpProblem.get("c1") c2 = lpProblem.get("c2") if LpStatus[lp.status] == "Optimal": end = time.time() varsdict = {} for v in lp.variables(): varsdict[v.name] = v.varValue gRF = 0 for m in range(0,len(c1)): gRF = gRF + 1 for l in range(0,len(c2)): kex = "x_" + str(m) + "_" + str(l) if (varsdict[kex] == 1.0): cup = [i for i in c1[m] if i in c2[l]] gRF = gRF - len(cup)/(len(c1[m]) + len(c2[l]) - len(cup)) for m in range(0,len(c2)): used = 0 for l in range(0,len(c1)): kex = "x_" + str(l) + "_" + str(m) if (varsdict[kex] == 1.0): used = 1 if used == 0: gRF = gRF + 1 solution = {'clusterOne': c1, 'clusterTwo': c2, 'vardsDict': json.dumps(varsdict)} tree_list[i]['GRF' + str(k)] = {"cost": gRF, "time": end - start, "time_creation": time_creation} #Compute all TEDs defined in variable 'keys' for key,k in keys.items(): if (key not in tree_list[i]): start = time.time() print(key) cost = zss.distance( tree_one, tree_two, tree_one.get_children,insert_cost_delta(k), remove_cost_delta(k), update_cost=lambda a, b: strdist(ExtendedNode.get_label(a), ExtendedNode.get_label(b))) end = time.time() tree_list[i][key] = {"cost": cost, "time": end - start} key2 = key + "_a" if (key2 not in tree_list[i]): start = time.time() print(key2) cost = zss.distance( tree_one_adapted, tree_two, tree_one.get_children,insert_cost_delta(k), remove_cost_delta(k), update_cost=lambda a, b: strdist(ExtendedNode.get_label(a), ExtendedNode.get_label(b))) end = time.time() tree_list[i][key2] = {"cost": cost, "time": end - start} with open(file_name, 'w') as outfile: json.dump(tree_list, outfile)
def remove_cost(node): if (ExtendedNode.get_label(node) != 0): return strdist(ExtendedNode.get_label(node), '') else: return delta
def insert_cost(node): if (ExtendedNode.get_label(node) != 0): return strdist('', ExtendedNode.get_label(node)) else: return delta
def weird_update_dist(A, B): return strdist(A,B)
def treedist(i, j): if i in treedists and j in treedists[i]: return treedists[i][j] def s(i, j, v): if i not in treedists: treedists[i] = dict() treedists[i][j] = v fd = forestdists = dict() def gfd(a, b): # get an item from the forest dists array if (a,b) in forestdists: return forestdists[(a,b)] if a[0] >= a[1] and b[0] >= b[1]: # δ(θ, θ) = 0 return 0 if b[0] >= b[1]: return forestdists[(a,(0,0))] if a[0] >= a[1]: return forestdists[((0,0),b)] raise KeyError, (a,b) Al = A.lmds Bl = B.lmds An = A.nodes Bn = B.nodes for x in xrange(Al[i], i+1): # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ) fd[(Al[i], x), (0, 0)] = ( gfd((Al[i],x-1), (0, 0)) + strdist(An[x].label, '') ) for y in xrange(Bl[j], j+1): # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w) fd[(0, 0), (Bl[j], y)] = ( gfd((0,0), (Bl[j],y-1)) + strdist('', Bn[y].label) ) for x in xrange(Al[i], i+1): ## the plus one is for the xrange impl for y in xrange(Bl[j], j+1): # only need to check if x is an ancestor of i # and y is an ancestor of j if (A.lmds[i] == A.lmds[x] and B.lmds[j] == B.lmds[y] or (x == i and y == j)): # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w) # +- fd[((Al[i], x), (Bl[j], y))] = min( ( gfd((Al[i],x-1), (Bl[j], y)) + strdist(An[x].label, '') ), ( gfd((Al[i], x), (Bl[j],y-1)) + strdist('', Bn[y].label) ), ( gfd((Al[i],x-1), (Bl[j],y-1)) +strdist(An[x].label, Bn[y].label) ) ) s(x, y, fd[((Al[i], x), (Bl[j], y))]) else: # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..l(i)-1, l(j1)..l(j)-1) + treedist(i,j) # +- fd[((Al[i], x), (Bl[j], y))] = min( ( gfd((Al[i],x-1), (Bl[j], y)) + strdist(An[x].label, '') ), ( gfd((Al[i], x), (Bl[j],y-1)) + strdist('', Bn[y].label) ), ( gfd((Al[i],Al[x]-1), (Bl[j],Bl[y]-1)) + treedist(x, y) ) ) if i in treedists and j in treedists[i]: return treedists[i][j] else: print 'WTF' print (A.lmds[i], i), (B.lmds[j], j), tuple(xrange(A.lmds[i], i+1)), tuple(xrange(B.lmds[j], j+1)) print x,y print treedists sys.exit(1)
def my_distance(node1, node2): return strdist(node1, node2)
def weird_dist(A, B): return strdist(A, B)
def treedist(i, j): if i in treedists and j in treedists[i]: return treedists[i][j] def s(i, j, v): if i not in treedists: treedists[i] = dict() treedists[i][j] = v fd = forestdists = dict() def gfd(a, b): # get an item from the forest dists array if (a,b) in forestdists: return forestdists[(a,b)] if a[0] >= a[1] and b[0] >= b[1]: # δ(θ, θ) = 0 return 0 if b[0] >= b[1]: return forestdists[(a,(0,0))] if a[0] >= a[1]: return forestdists[((0,0),b)] raise KeyError, (a,b) Al = A.lmds Bl = B.lmds An = A.nodes Bn = B.nodes for x in xrange(Al[i], i+1): # δ(l(i1)..i, θ) = δ(l(1i)..1-1, θ) + γ(v → λ) fd[(Al[i], x), (0, 0)] = ( gfd((Al[i],x-1), (0, 0)) + strdist(An[x].label, '') ) for y in xrange(Bl[j], j+1): # δ(θ, l(j1)..j) = δ(θ, l(j1)..j-1) + γ(λ → w) fd[(0, 0), (Bl[j], y)] = ( gfd((0,0), (Bl[j],y-1)) + strdist('', Bn[y].label) ) for x in xrange(Al[i], i+1): ## the plus one is for the xrange impl for y in xrange(Bl[j], j+1): # only need to check if x is an ancestor of i # and y is an ancestor of j if A.lmds[i] == A.lmds[x] and B.lmds[j] == B.lmds[y]: # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..i-1, l(j1)..j-1) + γ(v → w) # +- fd[((Al[i], x), (Bl[j], y))] = min( ( gfd((Al[i],x-1), (Bl[j], y)) + strdist(An[x].label, '') ), ( gfd((Al[i], x), (Bl[j],y-1)) + strdist('', Bn[y].label) ), ( gfd((Al[i],x-1), (Bl[j],y-1)) +strdist(An[x].label, Bn[y].label) ) ) s(x, y, fd[((Al[i], x), (Bl[j], y))]) else: # +- # | δ(l(i1)..i-1, l(j1)..j) + γ(v → λ) # δ(F1 , F2 ) = min-+ δ(l(i1)..i , l(j1)..j-1) + γ(λ → w) # | δ(l(i1)..l(i)-1, l(j1)..l(j)-1) + treedist(i,j) # +- fd[((Al[i], x), (Bl[j], y))] = min( ( gfd((Al[i],x-1), (Bl[j], y)) + strdist(An[x].label, '') ), ( gfd((Al[i], x), (Bl[j],y-1)) + strdist('', Bn[y].label) ), ( gfd((Al[i],Al[x]-1), (Bl[j],Bl[y]-1)) + treedist(x, y) ) ) if i in treedists and j in treedists[i]: return treedists[i][j] else: print('WTF') print(A.lmds[i], i), (B.lmds[j], j), tuple(xrange(A.lmds[i], i+1)), tuple(xrange(B.lmds[j], j+1)) print(x, y) print(treedists) sys.exit(1)