Example #1
0
 def evaluate(self, pred, true):
     ''' Computes TEDS score between the prediction and the ground truth of a
         given sample
     '''
     if (not pred) or (not true):
         return 0.0
     parser = html.HTMLParser(remove_comments=True, encoding='utf-8')
     pred = html.fromstring(pred, parser=parser)
     true = html.fromstring(true, parser=parser)
     if pred.xpath('body/table') and true.xpath('body/table'):
         pred = pred.xpath('body/table')[0]
         true = true.xpath('body/table')[0]
         if self.ignore_nodes:
             etree.strip_tags(pred, *self.ignore_nodes)
             etree.strip_tags(true, *self.ignore_nodes)
         n_nodes_pred = len(pred.xpath(".//*"))
         n_nodes_true = len(true.xpath(".//*"))
         n_nodes = max(n_nodes_pred, n_nodes_true)
         tree_pred = self.load_html_tree(pred)
         tree_true = self.load_html_tree(true)
         distance = APTED(tree_pred, tree_true,
                          CustomConfig()).compute_edit_distance()
         return 1.0 - (float(distance) / n_nodes)
     else:
         return 0.0
Example #2
0
def compute_edit_distance(src_file, para_file):
    src_data = load_conllu(src_file)
    para_data = load_conllu(para_file)
    assert len(src_data) == len(para_data)

    edit_distances = []
    for key in tqdm(src_data.keys(), total=len(src_data)):
        src_sent = src_data[key]
        para_sent = para_data[key]
        src_root, _ = head_to_tree(src_sent['head'], src_sent['upos'])
        para_root, _ = head_to_tree(para_sent['head'], para_sent['upos'])
        src_tree_to_string = []
        treeToString(src_root, src_tree_to_string)
        src_tree_to_string = ['{'] + src_tree_to_string + ['}']
        src_tree_to_string = ''.join(src_tree_to_string)
        para_tree_to_string = []
        treeToString(para_root, para_tree_to_string)
        para_tree_to_string = ['{'] + para_tree_to_string + ['}']
        para_tree_to_string = ''.join(para_tree_to_string)
        # print(src_tree_to_string)
        # print(para_tree_to_string)
        apted = APTED(aptedTree.from_text(src_tree_to_string),
                      aptedTree.from_text(para_tree_to_string))
        ted = apted.compute_edit_distance()
        edit_distances.append(ted)
        # mapping = apted.compute_edit_mapping()
        # print(mapping)

    return edit_distances
Example #3
0
def create_mapping(root1, root2):
    """Creates mapping between trees rooted at root1 and root2

    Returns:
    -- new root
    -- map from node index 1 to resulting node
    -- map from node index 2 to resulting node
    """
    apted = APTED(root1, root2, CONFIG)
    mapping = apted.compute_edit_mapping()

    combined_duration = copy(root1.duration)
    combined_duration.update(root2.duration)
    trial_ids = root1.trial_ids + root2.trial_ids
    id_to_node1 = {}
    id_to_node2 = {}

    for node1, node2 in mapping:
        if node1 is None:
            node = id_to_node2[node2.index] = copy(node2)
            node.children1 = []
            node.children2 = node2.children
            node.original1 = None
            node.original2 = node2.index

        elif node2 is None:
            node = id_to_node1[node1.index] = copy(node1)
            node.children1 = node1.children
            node.children2 = []
            node.original1 = node1.index
            node.original2 = None
        else:
            if node1.name != node2.name:
                print("Warning. Mismatch?", node1.name, node2.name)
            merge(node1, node2, id_to_node1, id_to_node2)
            # Note that it overrides node1 attributes

    if id_to_node1[root1.index] is not id_to_node2[root2.index]:
        root = Node(
            index=0,
            parent_index=0,
            name="<diff>",
            caller_id=0,
            original1=None,
            original2=None,
            children1=[root1],
            children2=[root2],
            activations=[],
            duration=combined_duration,
            full_tooltip=True,
            tooltip={x: "Diff" for x in trial_ids},
            children_index=-1,
            trial_ids=trial_ids
        )
    else:
        root = id_to_node1[root1.index]

    return root, id_to_node1, id_to_node2
Example #4
0
def apted_code_distance(code_a, code_b):
    tree_a = gen_apted_tree(code_a)
    tree_b = gen_apted_tree(code_b)

    from apted import APTED

    apted = APTED(tree_a, tree_b)
    ted = apted.compute_edit_distance()
    return ted
def apted(tree1, tree2):
    # remove outer brackets and strip all white space
    str_t1 = apted_tree_format(tree1).strip()[1:-1].strip()
    str_t2 = apted_tree_format(tree2).strip()[1:-1].strip()

    # convert to apted tree from apted format
    t1 = helpers.Tree.from_text(str_t1)
    t2 = helpers.Tree.from_text(str_t2)

    apted = APTED(t1, t2)

    return apted.compute_edit_distance()
Example #6
0
def compute_distance_matrix(test_imgs, codebook_trees, cmdist):
    M = np.zeros((len(test_imgs), len(codebook_trees)), np.float_)
    for i in range(len(test_imgs)):
        print(i)
        t1 = test_imgs[i]
        M[i] = ([
            APTED(t1.tree.root, t2.tree.root,
                  my_distance(cmdist)).compute_edit_distance()
            for t2 in codebook_trees
        ])

    return M
Example #7
0
def count_distance(a_ast, b_ast):
    """
       Counts tree edit distance between two ast trees.
    """

    a_ast = build_tree(a_ast)
    b_ast = build_tree(b_ast)

    apted = APTED(a_ast, b_ast, CustomConfig())
    ted = apted.compute_edit_distance()

    return ted
Example #8
0
def get_tree_edit_distance(tree1, tree2):
    class TreeEditDistanceConfig(Config):
        def __init__(self):
            pass

        def rename(self, node1, node2):
            return 1 if node1.value != node2.value else 0

        def children(self, node):
            return [x for x in node.children]

    apted = APTED(tree1, tree2, TreeEditDistanceConfig())
    ed = apted.compute_edit_distance()
    return ed
Example #9
0
 def evaluate(self, pred, true):
     ''' Computes TEDS score between the prediction and the ground truth of a
         given sample
     '''
     if (not pred) or (not true):
         return 0.0
     pred = html.fromstring(pred)
     true = html.fromstring(true)
     if pred.xpath('body/table') and true.xpath('body/table'):
         pred = pred.xpath('body/table')[0]
         true = true.xpath('body/table')[0]
         n_nodes_pred = len(pred.xpath(".//*"))
         n_nodes_true = len(true.xpath(".//*"))
         n_nodes = max(n_nodes_pred, n_nodes_true)
         tree_pred = self.load_html_tree(pred)
         tree_true = self.load_html_tree(true)
         distance = APTED(tree_pred, tree_true, CustomConfig()).compute_edit_distance()
         return 1.0 - (float(distance) / n_nodes)
     else:
         return 0.0
Example #10
0
def diff(tree_before: Node, tree_after: Node) -> (int, dict):
    """
    Returns the difference between two QEP trees

    :param tree_before: The 'before tree'.
    :param tree_after: The 'after tree'.
    :return:
        distance: The structural edit distance between the two trees.
            Only difference in algorithm is captured.
        delta: The difference between the two trees. Has 3 keys:
            - deleted: Those nodes that are deleted from tree_before
            - inserted: Those nodes that are inserted into tree_after
            - stayed: Those nodes that are present in both trees. Has two
                keys:

                - before: the nodes in tree_before
                - after : the nodes in tree_after

                Note that the before and after may be different in attributes
                other than algorithm and operation.
    """
    apted = APTED(tree_before, tree_after, APTEDConfig())
    distance = apted.compute_edit_distance()
    mapping = apted.compute_edit_mapping()

    delta = {
        "deleted": [m[0] for m in mapping if m[1] is None],
        "inserted": [m[1] for m in mapping if m[0] is None],
        "stayed": {
            "before":
            [m[0] for m in mapping if m[0] is not None and m[1] is not None],
            "after":
            [m[1] for m in mapping if m[0] is not None and m[1] is not None]
        }
    }
    return distance, delta
Example #11
0
def treeDistance(tree1, tree2):
    """Compute distance between two trees"""
    tree1, tree2 = treeToTree(tree1), treeToTree(tree2)
    ap = APTED(tree1, tree2)
    return ap.compute_edit_distance()
Example #12
0
def calc_dist_trees(t1, t2, clusters_dist):
    print(t1.tree.name, t2.tree.name)
    return APTED(t1.tree.root, t2.tree.root, 
        ImageTreeDistance(clusters_dist)).compute_edit_distance()
Example #13
0
def text_search(request):
    if request.method == 'POST':
        form = TextSearchForm(request.POST)

        if form.is_valid():
            captionRanksDict = {}
            sortedCaptionRanksDict = {}
            global queryParseTree
            global queryDepTree
            global query_image_vec

            # Fetching the checkbox selections.
            ranking_func = form.cleaned_data['ranking_function']

            # QIK Search -- Start ##
            # Noting the time taken for further auditing.
            time = datetime.datetime.now()

            # Getting the captions.
            query = form.cleaned_data['query']
            print("Caption :: ", query)

            # Querying the backend to fetch the list of images and captions.
            req = constants.SOLR_QUERY_URL + query
            res = json.loads(requests.get(req).text)
            print("Response :: ", res)
            print("QIK Fetch Execution time :: ",
                  (datetime.datetime.now() - time))

            # Forming the return image set.
            if res is not None:
                # Generating the parse tree for the input query.
                queryParseTree = parse_show_tree.parseSentence(query)

                # Generating the dependency tree for the input query.
                queryDepTree = parse_show_tree.dependencyParser(query)

                # Performing TED based Ranking on the parse tree.
                if ranking_func == 'Parse Tree':
                    for resMap in res:
                        # for Auditing TED Time
                        ted_time = datetime.datetime.now()

                        image = resMap['fileURL']
                        caption = resMap['caption']
                        captionParseTree = resMap['parseTree']

                        parseTED = APTED(apth.Tree.from_text(queryParseTree),
                                         apth.Tree.from_text(captionParseTree),
                                         PerEditOperationConfig(
                                             1, 1, 1)).compute_edit_distance()
                        print("Caption ::", caption, " :: TED :: ", parseTED)
                        print("Time taken to compute Parse Tree TED :: ",
                              (datetime.datetime.now() - ted_time))

                        # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                        image_path = image.replace(
                            constants.TOMCAT_OLD_IP_ADDR,
                            constants.TOMCAT_IP_ADDR)

                        captionRanksDict[image_path + ":: " +
                                         caption] = parseTED

                    # Sorting the results based on the Parse TED.
                    sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                                    key=lambda kv: kv[1],
                                                    reverse=False)
                    print(sortedCaptionRanksDict)

                elif ranking_func == 'Dependency Tree':
                    for resMap in res:
                        # for Auditing TED Time
                        ted_time = datetime.datetime.now()

                        image = resMap['fileURL']
                        caption = resMap['caption']
                        depTree = resMap['depTree']

                        parseTED = APTED(apth.Tree.from_text(queryDepTree),
                                         apth.Tree.from_text(depTree),
                                         PerEditOperationConfig(
                                             1, 1, 1)).compute_edit_distance()
                        print("Caption ::", caption, " :: TED :: ", parseTED)
                        print("Time taken to compute Dependency Tree TED :: ",
                              (datetime.datetime.now() - ted_time))

                        # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                        image_path = image.replace(
                            constants.TOMCAT_OLD_IP_ADDR,
                            constants.TOMCAT_IP_ADDR)

                        captionRanksDict[image_path + ":: " +
                                         caption] = parseTED

                    # Sorting the results based on the Parse TED.
                    sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                                    key=lambda kv: kv[1],
                                                    reverse=False)
                    print(sortedCaptionRanksDict)

                else:
                    # Forming the return image set (Without ranking)
                    for resMap in res:
                        caption = resMap['caption']
                        image = resMap['fileURL']

                        # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                        image_path = image.replace(
                            constants.TOMCAT_OLD_IP_ADDR,
                            constants.TOMCAT_IP_ADDR)

                        captionRanksDict[image_path + ":: " + caption] = 1
                    print(captionRanksDict)

                    # Formating done for Ranking
                    sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                                    key=lambda kv: kv[1],
                                                    reverse=True)
                    print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict)

            # Auditing the QIK execution time.
            print("QIK Execution time :: ", (datetime.datetime.now() - time))
            ## QIK Search -- End ##

            # Returning the fetched images.
            return render(request, 'webapp/results.html', {
                'form': form,
                'images': sortedCaptionRanksDict
            })

    else:
        # Initial loading
        caption_generator.init()

    form = TextSearchForm()
    return render(request, 'webapp/home.html', {'form': form})
Example #14
0
def calc_dist_trees(t1, t2, apted_dist):
    #print(t1.tree.name, t2.tree.name)
    return APTED(t1.tree.root, t2.tree.root, apted_dist).compute_edit_distance()
Example #15
0
def calculate_edit_distance(tree1, tree2):
    apted = APTED(tree1, tree2, TreeEditDistanceConfig())
    ed = apted.compute_edit_distance()
    return ed
Example #16
0
 def apted(self, t1: Tree, t2: Tree):
     return APTED(t1, t2, AptedConfig(self.adapter)).compute_edit_mapping()
Example #17
0
def qik_search(query_image,
               ranking_func=None,
               obj_det_enabled=False,
               pure_objects_search=False,
               fetch_count=None):
    obj_res = None
    cap_res = None
    similar_images = None

    captionRanksDict = {}
    sortedCaptionRanksDict = {}

    # Noting the time taken for further auditing.
    time = datetime.datetime.now()

    if obj_det_enabled:
        # Initial Loading of the object detection model.
        detect_objects.init()

        # Detecting objects.
        json_data = {}
        json_data['objects'] = detect_objects.get_detected_objects(
            query_image, constants.OBJECT_DETECTED_THRESHOLD)
        print("qik_search :: qik_search :: objects :: ", json_data['objects'])

        # Querying the backend to fetch the list of images and captions based on the objects detected.
        obj_req = constants.DETECT_OBJECTS_URL + urllib.parse.quote(
            str(json_data))
        obj_res = json.loads(requests.get(obj_req).text)
        print("qik_search :: qik_search :: obj_res :: ", obj_res)

    if pure_objects_search:
        if obj_res is not None:
            # Forming the return image set.
            for resMap in obj_res:
                caption = resMap['caption']
                image = resMap['fileURL']

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = 1
            print(captionRanksDict)

            # Formating done for Ranking
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=True)

            # Auditing the QIK execution time.
            print("QIK Execution time :: ", (datetime.datetime.now() - time))

            if sortedCaptionRanksDict and fetch_count is not None:
                print("sortedCaptionRanksDict :: ",
                      sortedCaptionRanksDict[:fetch_count])
                return "Query Image", sortedCaptionRanksDict[:
                                                             fetch_count], None
            else:
                print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict)
                return "Query Image", sortedCaptionRanksDict, None

        return "Query Image", sortedCaptionRanksDict, None

    # Initial Loading of the caption generator model.
    caption_generator.init()

    # Generating the captions.
    query = caption_generator.get_caption(query_image, True)

    # Handling the fullstops in captions.
    if query[-1] == '.':
        query = query[:-1].strip()
    print("Caption Generated :: ", query)

    # Querying the backend to fetch the list of images and captions.
    cap_req = constants.SOLR_QUERY_URL + query
    cap_res = json.loads(requests.get(cap_req).text)
    print("QIK Captions Response :: ", cap_res)
    print("QIK Fetch Execution time :: ", (datetime.datetime.now() - time))

    # Merging the two responses.
    if obj_res is None:
        res = cap_res
    elif cap_res is None:
        res = obj_res
    else:
        res = obj_res + cap_res
    print("QIK Combined Response :: ", res)

    # Forming the return image set.
    if res is not None:
        # Generating the parse tree for the input query.
        queryParseTree = parse_show_tree.parseSentence(query)

        # Generating the dependency tree for the input query.
        queryDepTree = parse_show_tree.dependencyParser(query)

        # Performing TED based Ranking on the parse tree.
        if ranking_func == 'Parse Tree':
            for resMap in res:
                # for Auditing TED Time
                ted_time = datetime.datetime.now()

                image = resMap['fileURL']
                caption = resMap['caption']
                captionParseTree = resMap['parseTree']

                parseTED = APTED(apth.Tree.from_text(queryParseTree),
                                 apth.Tree.from_text(captionParseTree),
                                 PerEditOperationConfig(
                                     1, 1, 1)).compute_edit_distance()

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = parseTED

            # Sorting the results based on the Parse TED.
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=False)

        elif ranking_func == 'Dependency Tree':
            for resMap in res:
                # for Auditing TED Time
                ted_time = datetime.datetime.now()

                image = resMap['fileURL']
                caption = resMap['caption']
                depTree = resMap['depTree']

                parseTED = APTED(apth.Tree.from_text(queryDepTree),
                                 apth.Tree.from_text(depTree),
                                 PerEditOperationConfig(
                                     1, 1, 1)).compute_edit_distance()

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = parseTED

            # Sorting the results based on the Parse TED.
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=False)

        else:
            # Forming the return image set (Without ranking)
            for resMap in res:
                caption = resMap['caption']
                image = resMap['fileURL']

                # Temp Fix done to replace Tomcat IP. Needs to be handled in the IndexEngine.
                image_path = image.replace(constants.TOMCAT_OLD_IP_ADDR,
                                           constants.TOMCAT_IP_ADDR)

                captionRanksDict[image_path + ":: " + caption] = 1
            print(captionRanksDict)

            # Formating done for Ranking
            sortedCaptionRanksDict = sorted(captionRanksDict.items(),
                                            key=lambda kv: kv[1],
                                            reverse=True)

        similar_images = get_similar_images(query)
        print("qik_search :: qik_search :: similar_images :: ", similar_images)

    # Auditing the QIK execution time.
    print("QIK Execution time :: ", (datetime.datetime.now() - time))

    print("Arun :: fetch_count :: ", fetch_count)

    if sortedCaptionRanksDict and fetch_count is not None:
        print("Arun :: Entering :: ")
        print("sortedCaptionRanksDict :: ",
              sortedCaptionRanksDict[:fetch_count])
        return query, sortedCaptionRanksDict[:fetch_count], similar_images
    else:
        print("sortedCaptionRanksDict :: ", sortedCaptionRanksDict)
        return query, sortedCaptionRanksDict, similar_images
Example #18
0
def tree_edit_dist(tr1, tr2):
    return APTED(tr1, tr2).compute_edit_distance()
Example #19
0
def mid_tree_edit_dist(tactr, kdx1, kdx2):
    return APTED(mid2tr(tactr, kdx1), mid2tr(tactr,
                                             kdx2)).compute_edit_distance()
Example #20
0
def kern_tree_edit_dist(tactr, kdx1, kdx2):
    return APTED(kern2tr(tactr, kdx1), kern2tr(tactr,
                                               kdx2)).compute_edit_distance()
Example #21
0
     c_loss = get_cluster_score(x_embs1, x_info_ids1)
     c_loss = torch.Tensor([c_loss])
     c_loss.requires_grad_()
     c_loss = c_loss.cuda()                        
     #------------loss_s--------------------#
     s_len = len(samples_tree)
     _loss = []
     _losses = 0
     for i in range(s_len): # 32
         _uid = real_DT[i].split('_')[0]
         tt1_i = train_DT_id.index(_uid)
         tt1 = train_DT[tt1_i]
         tree1 = Tree.from_text(tt1)
         tt2 = samples_tree[i]
         tree2 = Tree.from_text(tt2)
         _apted = APTED(tree1, tree2, Config())
         ted = _apted.compute_edit_distance()
         _loss.append(ted)
         _losses += ted
     
     t_loss = torch.mean(torch.Tensor([_loss]))
     t_loss.requires_grad_()
     t_loss = t_loss.cuda()
     #-------------------------------------------# 
 
 # construct the input to the genrator, add zeros before samples and delete the last column
 zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor)
 if samples.is_cuda:
     zeros = zeros.cuda()
 
 inputs = Variable(torch.cat([zeros, samples.data], dim = 1)[:, :-1].contiguous())                    
Example #22
0
    def __eq__(self, that):

        if isinstance(that, NTree):
            return APTED(self, that).compute_edit_distance() == 0
        else:
            return False