def evaluate(self, pred, true): ''' Computes TEDS score between the prediction and the ground truth of a given sample ''' if (not pred) or (not true): return 0.0 parser = html.HTMLParser(remove_comments=True, encoding='utf-8') pred = html.fromstring(pred, parser=parser) true = html.fromstring(true, parser=parser) if pred.xpath('body/table') and true.xpath('body/table'): pred = pred.xpath('body/table')[0] true = true.xpath('body/table')[0] if self.ignore_nodes: etree.strip_tags(pred, *self.ignore_nodes) etree.strip_tags(true, *self.ignore_nodes) n_nodes_pred = len(pred.xpath(".//*")) n_nodes_true = len(true.xpath(".//*")) n_nodes = max(n_nodes_pred, n_nodes_true) tree_pred = self.load_html_tree(pred) tree_true = self.load_html_tree(true) distance = APTED(tree_pred, tree_true, CustomConfig()).compute_edit_distance() return 1.0 - (float(distance) / n_nodes) else: return 0.0
def compute_edit_distance(src_file, para_file): src_data = load_conllu(src_file) para_data = load_conllu(para_file) assert len(src_data) == len(para_data) edit_distances = [] for key in tqdm(src_data.keys(), total=len(src_data)): src_sent = src_data[key] para_sent = para_data[key] src_root, _ = head_to_tree(src_sent['head'], src_sent['upos']) para_root, _ = head_to_tree(para_sent['head'], para_sent['upos']) src_tree_to_string = [] treeToString(src_root, src_tree_to_string) src_tree_to_string = ['{'] + src_tree_to_string + ['}'] src_tree_to_string = ''.join(src_tree_to_string) para_tree_to_string = [] treeToString(para_root, para_tree_to_string) para_tree_to_string = ['{'] + para_tree_to_string + ['}'] para_tree_to_string = ''.join(para_tree_to_string) # print(src_tree_to_string) # print(para_tree_to_string) apted = APTED(aptedTree.from_text(src_tree_to_string), aptedTree.from_text(para_tree_to_string)) ted = apted.compute_edit_distance() edit_distances.append(ted) # mapping = apted.compute_edit_mapping() # print(mapping) return edit_distances
def create_mapping(root1, root2): """Creates mapping between trees rooted at root1 and root2 Returns: -- new root -- map from node index 1 to resulting node -- map from node index 2 to resulting node """ apted = APTED(root1, root2, CONFIG) mapping = apted.compute_edit_mapping() combined_duration = copy(root1.duration) combined_duration.update(root2.duration) trial_ids = root1.trial_ids + root2.trial_ids id_to_node1 = {} id_to_node2 = {} for node1, node2 in mapping: if node1 is None: node = id_to_node2[node2.index] = copy(node2) node.children1 = [] node.children2 = node2.children node.original1 = None node.original2 = node2.index elif node2 is None: node = id_to_node1[node1.index] = copy(node1) node.children1 = node1.children node.children2 = [] node.original1 = node1.index node.original2 = None else: if node1.name != node2.name: print("Warning. Mismatch?", node1.name, node2.name) merge(node1, node2, id_to_node1, id_to_node2) # Note that it overrides node1 attributes if id_to_node1[root1.index] is not id_to_node2[root2.index]: root = Node( index=0, parent_index=0, name="<diff>", caller_id=0, original1=None, original2=None, children1=[root1], children2=[root2], activations=[], duration=combined_duration, full_tooltip=True, tooltip={x: "Diff" for x in trial_ids}, children_index=-1, trial_ids=trial_ids ) else: root = id_to_node1[root1.index] return root, id_to_node1, id_to_node2
def apted_code_distance(code_a, code_b): tree_a = gen_apted_tree(code_a) tree_b = gen_apted_tree(code_b) from apted import APTED apted = APTED(tree_a, tree_b) ted = apted.compute_edit_distance() return ted
def apted(tree1, tree2): # remove outer brackets and strip all white space str_t1 = apted_tree_format(tree1).strip()[1:-1].strip() str_t2 = apted_tree_format(tree2).strip()[1:-1].strip() # convert to apted tree from apted format t1 = helpers.Tree.from_text(str_t1) t2 = helpers.Tree.from_text(str_t2) apted = APTED(t1, t2) return apted.compute_edit_distance()
def compute_distance_matrix(test_imgs, codebook_trees, cmdist): M = np.zeros((len(test_imgs), len(codebook_trees)), np.float_) for i in range(len(test_imgs)): print(i) t1 = test_imgs[i] M[i] = ([ APTED(t1.tree.root, t2.tree.root, my_distance(cmdist)).compute_edit_distance() for t2 in codebook_trees ]) return M
def count_distance(a_ast, b_ast): """ Counts tree edit distance between two ast trees. """ a_ast = build_tree(a_ast) b_ast = build_tree(b_ast) apted = APTED(a_ast, b_ast, CustomConfig()) ted = apted.compute_edit_distance() return ted
def get_tree_edit_distance(tree1, tree2): class TreeEditDistanceConfig(Config): def __init__(self): pass def rename(self, node1, node2): return 1 if node1.value != node2.value else 0 def children(self, node): return [x for x in node.children] apted = APTED(tree1, tree2, TreeEditDistanceConfig()) ed = apted.compute_edit_distance() return ed
def evaluate(self, pred, true): ''' Computes TEDS score between the prediction and the ground truth of a given sample ''' if (not pred) or (not true): return 0.0 pred = html.fromstring(pred) true = html.fromstring(true) if pred.xpath('body/table') and true.xpath('body/table'): pred = pred.xpath('body/table')[0] true = true.xpath('body/table')[0] n_nodes_pred = len(pred.xpath(".//*")) n_nodes_true = len(true.xpath(".//*")) n_nodes = max(n_nodes_pred, n_nodes_true) tree_pred = self.load_html_tree(pred) tree_true = self.load_html_tree(true) distance = APTED(tree_pred, tree_true, CustomConfig()).compute_edit_distance() return 1.0 - (float(distance) / n_nodes) else: return 0.0
def diff(tree_before: Node, tree_after: Node) -> (int, dict): """ Returns the difference between two QEP trees :param tree_before: The 'before tree'. :param tree_after: The 'after tree'. :return: distance: The structural edit distance between the two trees. Only difference in algorithm is captured. delta: The difference between the two trees. Has 3 keys: - deleted: Those nodes that are deleted from tree_before - inserted: Those nodes that are inserted into tree_after - stayed: Those nodes that are present in both trees. Has two keys: - before: the nodes in tree_before - after : the nodes in tree_after Note that the before and after may be different in attributes other than algorithm and operation. """ apted = APTED(tree_before, tree_after, APTEDConfig()) distance = apted.compute_edit_distance() mapping = apted.compute_edit_mapping() delta = { "deleted": [m[0] for m in mapping if m[1] is None], "inserted": [m[1] for m in mapping if m[0] is None], "stayed": { "before": [m[0] for m in mapping if m[0] is not None and m[1] is not None], "after": [m[1] for m in mapping if m[0] is not None and m[1] is not None] } } return distance, delta
def treeDistance(tree1, tree2): """Compute distance between two trees""" tree1, tree2 = treeToTree(tree1), treeToTree(tree2) ap = APTED(tree1, tree2) return ap.compute_edit_distance()
def calc_dist_trees(t1, t2, clusters_dist): print(t1.tree.name, t2.tree.name) return APTED(t1.tree.root, t2.tree.root, ImageTreeDistance(clusters_dist)).compute_edit_distance()
def text_search(request): if request.method == 'POST': form = TextSearchForm(request.POST) if form.is_valid(): captionRanksDict = {} sortedCaptionRanksDict = {} global queryParseTree global queryDepTree global query_image_vec # Fetching the checkbox selections. ranking_func = form.cleaned_data['ranking_function'] # QIK Search -- Start ## # Noting the time taken for further auditing. time = datetime.datetime.now() # Getting the captions. query = form.cleaned_data['query'] print("Caption :: ", query) # Querying the backend to fetch the list of images and captions. req = constants.SOLR_QUERY_URL + query res = json.loads(requests.get(req).text) print("Response :: ", res) print("QIK Fetch Execution time :: ", (datetime.datetime.now() - time)) # Forming the return image set. if res is not None: # Generating the parse tree for the input query. queryParseTree = parse_show_tree.parseSentence(query) # Generating the dependency tree for the input query. queryDepTree = parse_show_tree.dependencyParser(query) # Performing TED based Ranking on the parse tree. if ranking_func == 'Parse Tree': for resMap in res: # for Auditing TED Time ted_time = datetime.datetime.now() image = resMap['fileURL'] caption = resMap['caption'] captionParseTree = resMap['parseTree'] parseTED = APTED(apth.Tree.from_text(queryParseTree), apth.Tree.from_text(captionParseTree), PerEditOperationConfig( 1, 1, 1)).compute_edit_distance() print("Caption ::", caption, " :: TED :: ", parseTED) print("Time taken to compute Parse Tree TED :: ", (datetime.datetime.now() - ted_time)) # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace( constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = parseTED # Sorting the results based on the Parse TED. sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=False) print(sortedCaptionRanksDict) elif ranking_func == 'Dependency Tree': for resMap in res: # for Auditing TED Time ted_time = datetime.datetime.now() image = resMap['fileURL'] caption = resMap['caption'] depTree = resMap['depTree'] parseTED = APTED(apth.Tree.from_text(queryDepTree), apth.Tree.from_text(depTree), PerEditOperationConfig( 1, 1, 1)).compute_edit_distance() print("Caption ::", caption, " :: TED :: ", parseTED) print("Time taken to compute Dependency Tree TED :: ", (datetime.datetime.now() - ted_time)) # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace( constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = parseTED # Sorting the results based on the Parse TED. sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=False) print(sortedCaptionRanksDict) else: # Forming the return image set (Without ranking) for resMap in res: caption = resMap['caption'] image = resMap['fileURL'] # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace( constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = 1 print(captionRanksDict) # Formating done for Ranking sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=True) print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict) # Auditing the QIK execution time. print("QIK Execution time :: ", (datetime.datetime.now() - time)) ## QIK Search -- End ## # Returning the fetched images. return render(request, 'webapp/results.html', { 'form': form, 'images': sortedCaptionRanksDict }) else: # Initial loading caption_generator.init() form = TextSearchForm() return render(request, 'webapp/home.html', {'form': form})
def calc_dist_trees(t1, t2, apted_dist): #print(t1.tree.name, t2.tree.name) return APTED(t1.tree.root, t2.tree.root, apted_dist).compute_edit_distance()
def calculate_edit_distance(tree1, tree2): apted = APTED(tree1, tree2, TreeEditDistanceConfig()) ed = apted.compute_edit_distance() return ed
def apted(self, t1: Tree, t2: Tree): return APTED(t1, t2, AptedConfig(self.adapter)).compute_edit_mapping()
def qik_search(query_image, ranking_func=None, obj_det_enabled=False, pure_objects_search=False, fetch_count=None): obj_res = None cap_res = None similar_images = None captionRanksDict = {} sortedCaptionRanksDict = {} # Noting the time taken for further auditing. time = datetime.datetime.now() if obj_det_enabled: # Initial Loading of the object detection model. detect_objects.init() # Detecting objects. json_data = {} json_data['objects'] = detect_objects.get_detected_objects( query_image, constants.OBJECT_DETECTED_THRESHOLD) print("qik_search :: qik_search :: objects :: ", json_data['objects']) # Querying the backend to fetch the list of images and captions based on the objects detected. obj_req = constants.DETECT_OBJECTS_URL + urllib.parse.quote( str(json_data)) obj_res = json.loads(requests.get(obj_req).text) print("qik_search :: qik_search :: obj_res :: ", obj_res) if pure_objects_search: if obj_res is not None: # Forming the return image set. for resMap in obj_res: caption = resMap['caption'] image = resMap['fileURL'] # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = 1 print(captionRanksDict) # Formating done for Ranking sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=True) # Auditing the QIK execution time. print("QIK Execution time :: ", (datetime.datetime.now() - time)) if sortedCaptionRanksDict and fetch_count is not None: print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict[:fetch_count]) return "Query Image", sortedCaptionRanksDict[: fetch_count], None else: print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict) return "Query Image", sortedCaptionRanksDict, None return "Query Image", sortedCaptionRanksDict, None # Initial Loading of the caption generator model. caption_generator.init() # Generating the captions. query = caption_generator.get_caption(query_image, True) # Handling the fullstops in captions. if query[-1] == '.': query = query[:-1].strip() print("Caption Generated :: ", query) # Querying the backend to fetch the list of images and captions. cap_req = constants.SOLR_QUERY_URL + query cap_res = json.loads(requests.get(cap_req).text) print("QIK Captions Response :: ", cap_res) print("QIK Fetch Execution time :: ", (datetime.datetime.now() - time)) # Merging the two responses. if obj_res is None: res = cap_res elif cap_res is None: res = obj_res else: res = obj_res + cap_res print("QIK Combined Response :: ", res) # Forming the return image set. if res is not None: # Generating the parse tree for the input query. queryParseTree = parse_show_tree.parseSentence(query) # Generating the dependency tree for the input query. queryDepTree = parse_show_tree.dependencyParser(query) # Performing TED based Ranking on the parse tree. if ranking_func == 'Parse Tree': for resMap in res: # for Auditing TED Time ted_time = datetime.datetime.now() image = resMap['fileURL'] caption = resMap['caption'] captionParseTree = resMap['parseTree'] parseTED = APTED(apth.Tree.from_text(queryParseTree), apth.Tree.from_text(captionParseTree), PerEditOperationConfig( 1, 1, 1)).compute_edit_distance() # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = parseTED # Sorting the results based on the Parse TED. sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=False) elif ranking_func == 'Dependency Tree': for resMap in res: # for Auditing TED Time ted_time = datetime.datetime.now() image = resMap['fileURL'] caption = resMap['caption'] depTree = resMap['depTree'] parseTED = APTED(apth.Tree.from_text(queryDepTree), apth.Tree.from_text(depTree), PerEditOperationConfig( 1, 1, 1)).compute_edit_distance() # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = parseTED # Sorting the results based on the Parse TED. sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=False) else: # Forming the return image set (Without ranking) for resMap in res: caption = resMap['caption'] image = resMap['fileURL'] # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine. image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR, constants.TOMCAT_IP_ADDR) captionRanksDict[image_path + ":: " + caption] = 1 print(captionRanksDict) # Formating done for Ranking sortedCaptionRanksDict = sorted(captionRanksDict.items(), key=lambda kv: kv[1], reverse=True) similar_images = get_similar_images(query) print("qik_search :: qik_search :: similar_images :: ", similar_images) # Auditing the QIK execution time. print("QIK Execution time :: ", (datetime.datetime.now() - time)) print("Arun :: fetch_count :: ", fetch_count) if sortedCaptionRanksDict and fetch_count is not None: print("Arun :: Entering :: ") print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict[:fetch_count]) return query, sortedCaptionRanksDict[:fetch_count], similar_images else: print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict) return query, sortedCaptionRanksDict, similar_images
def tree_edit_dist(tr1, tr2): return APTED(tr1, tr2).compute_edit_distance()
def mid_tree_edit_dist(tactr, kdx1, kdx2): return APTED(mid2tr(tactr, kdx1), mid2tr(tactr, kdx2)).compute_edit_distance()
def kern_tree_edit_dist(tactr, kdx1, kdx2): return APTED(kern2tr(tactr, kdx1), kern2tr(tactr, kdx2)).compute_edit_distance()
c_loss = get_cluster_score(x_embs1, x_info_ids1) c_loss = torch.Tensor([c_loss]) c_loss.requires_grad_() c_loss = c_loss.cuda() #------------loss_s--------------------# s_len = len(samples_tree) _loss = [] _losses = 0 for i in range(s_len): # 32 _uid = real_DT[i].split('_')[0] tt1_i = train_DT_id.index(_uid) tt1 = train_DT[tt1_i] tree1 = Tree.from_text(tt1) tt2 = samples_tree[i] tree2 = Tree.from_text(tt2) _apted = APTED(tree1, tree2, Config()) ted = _apted.compute_edit_distance() _loss.append(ted) _losses += ted t_loss = torch.mean(torch.Tensor([_loss])) t_loss.requires_grad_() t_loss = t_loss.cuda() #-------------------------------------------# # construct the input to the genrator, add zeros before samples and delete the last column zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor) if samples.is_cuda: zeros = zeros.cuda() inputs = Variable(torch.cat([zeros, samples.data], dim = 1)[:, :-1].contiguous())
def __eq__(self, that): if isinstance(that, NTree): return APTED(self, that).compute_edit_distance() == 0 else: return False