Exemple #1
0
    def __init__(self, name, expression, mathml=None, initRetrievalTime='undefined'):
        self.name = name

        self.mathml = mathml
        self.results = {}
        self.documents = {}

        if mathml is not None:
            # parse from mathml (additional information extracted)
            self.tree = MathExtractor.convert_and_link_mathml(mathml)
            self.expression = self.tree.tostring()
        else:
            # parse from SLT string (no mathml information available)
            self.tree = SymbolTree.parse_from_slt(expression)
            self.expression = expression

        self.constraints = Query.create_default_constraints(self.tree)

        self.sorted_results = None
        self.sorted_result_index = None
        self.sorted_abs_ranks = None
        self.sorted_documents = None
        self.sorted_document_index = None
        self.elapsed_time = 0.0

        # RZ: add tuple-based retrieval time and other measures.
        self.initRetrievalTime = initRetrievalTime
        self.postings = None
        self.matchedFormulae = None
        self.matchedDocs = None

        # cache ...
        self.html_queryblock = {}
    def __init__(self, query, expression, original_ranking, original_score, mathml=None):
        self.query = query
        self.original_ranking = original_ranking
        self.original_score = original_score
        self.mathml = mathml
        self.new_scores = [0.0]

        if mathml is not None:
            # parse from mathml (additional information extracted)
            self.tree = MathExtractor.convert_and_link_mathml(mathml)
            self.expression = self.tree.tostring()

            out_file = open("probando.txt", 'w', encoding='utf-8')
            out_file.write(self.tree.tostring())
            out_file.close()
        else:
            # parse from SLT string (no mathml information available)
            self.tree = SymbolTree.parse_from_slt(expression)
            self.expression = expression
        if self.tree.tostring() != expression:
            print("Bad conversion for result for query " + query.name + ": " + expression + " -> " + self.tree.tostring())
            exit(1)
        self.locations = []
        self.matched_elements = []
        self.unified_elements = {}
        self.wildcard_matches = {}
        self.all_unified = []
        self.times_rendered = 0
Exemple #3
0
    def create_default_constraints(query_tree, default_value="U"):
        # duplicate structure ...
        root = Query.duplicate_structure(query_tree.root, default_value)
        # now create constraint nodes ....
        Query.convert_to_constraint_tree(root)

        # create and return symbol tree
        return SymbolTree(root)
def eval_similarity(query_data):
    # do actually evaluate similarity ....
    query, start_idx, expressions = query_data

    csv_reader = csv.reader(expressions,
                            delimiter='\t',
                            lineterminator='\n',
                            quoting=csv.QUOTE_NONE,
                            escapechar="\\")

    end_idx = start_idx + len(expressions) - 1

    #create query slt
    query_name, query_expression = query
    query_tree = SymbolTree.parse_from_slt(query_expression)
    query_constraints = Query.create_default_constraints(query_tree)

    results = []
    for idx, parts in enumerate(csv_reader):
        #for idx, expression_info in enumerate(expressions):
        #parts = expression_info.strip().split("\t")
        expression = parts[0]
        doc_id = parts[1]
        location = parts[2]

        candidate_tree = SymbolTree.parse_from_slt(expression)

        try:
            data = SIM_FUNCTION(query_tree, candidate_tree, query_constraints)
            scores = data[0]
        except:
            print("Error processing: ")
            print(query_expression, flush=True)
            print(expression, flush=True)
            print("Doc: " + doc_id, flush=True)
            print("Loc: " + location, flush=True)
            continue

        # the index is only returned because some expressions might be absent in case of errors
        results.append((scores, start_idx + idx))

    print("Processed: " + str(start_idx) + " to " + str(end_idx) + " finished",
          flush=True)

    return results
Exemple #5
0
    def set_constraints(self, slt_string):
        # create the tree with the original text labels
        tree_constraints = SymbolTree.parse_from_slt(slt_string)
        # convert the text labels to constraints
        Query.convert_to_constraint_tree(tree_constraints.root)

        if not Query.equal_subtree_structure(self.tree.root, tree_constraints.root):
            print("Warning: Invalid constraint tree specified for " + self.name)
        else:
            self.constraints = tree_constraints
Exemple #6
0
    def set_constraints(self, slt_string):
        # create the tree with the original text labels
        tree_constraints = SymbolTree.parse_from_slt(slt_string)
        # convert the text labels to constraints
        Query.convert_to_constraint_tree(tree_constraints.root)

        if not Query.equal_subtree_structure(self.tree.root,
                                             tree_constraints.root):
            print("Warning: Invalid constraint tree specified for " +
                  self.name)
        else:
            self.constraints = tree_constraints
def eval_similarity(query_data):
    # do actually evaluate similarity ....
    query, start_idx, expressions = query_data

    end_idx = start_idx + len(expressions) - 1

    #create query slt
    query_name, query_expression = query
    query_tree = SymbolTree.parse_from_slt(query_expression)
    query_constraints = Query.create_default_constraints(query_tree)

    results = []
    for idx, expression_info in enumerate(expressions):
        parts = expression_info.strip().split("\t")
        expression = parts[0]
        doc_id = parts[1]
        location = parts[2]

        candidate_tree = SymbolTree.parse_from_slt(expression)

        try:
            scores, matched_q, matched_c, unified_c = similarity_v04(query_tree, candidate_tree, query_constraints)
        except:
            print("Error processing: ")
            print(query_expression, flush=True)
            print(expression, flush=True)
            print("Doc: " + doc_id, flush=True)
            print("Loc: " + location, flush=True)
            continue

        # the index is only returned because some expressions might be absent in case of errors
        results.append((scores, start_idx + idx))

    print("Processed: " + str(start_idx) + " to " + str(end_idx) + " finished", flush=True)

    return results
Exemple #8
0
    def __init__(self,
                 name,
                 expression,
                 mathml=None,
                 initRetrievalTime='undefined',
                 max_results=0):
        self.name = name

        self.mathml = mathml
        self.results = {}
        self.documents = {}

        if mathml is not None:
            # parse from mathml (additional information extracted)
            self.tree = MathExtractor.convert_and_link_mathml(mathml)
            self.expression = self.tree.tostring()
        else:
            # parse from SLT string (no mathml information available)
            self.tree = SymbolTree.parse_from_slt(expression)
            self.expression = expression

        self.constraints = Query.create_default_constraints(self.tree)

        self.sorted_results = None
        self.sorted_result_index = None
        self.sorted_abs_ranks = None
        self.sorted_documents = None
        self.sorted_document_index = None
        self.elapsed_time = 0.0

        # RZ: add tuple-based retrieval time and other measures.
        self.initRetrievalTime = initRetrievalTime
        self.postings = None
        self.matchedFormulae = None
        self.matchedDocs = None

        # Re-rank at most K results
        self.max_results = max_results

        # cache ...
        self.html_queryblock = {}
Exemple #9
0
    def __init__(self, query, expression, original_ranking, original_score, mathml=None):
        self.query = query
        self.original_ranking = original_ranking
        self.original_score = original_score
        self.mathml = mathml
        self.new_scores = [0.0]

        if mathml is not None:
            # parse from mathml (additional information extracted)
            self.tree = MathExtractor.convert_and_link_mathml(mathml)
            self.expression = self.tree.tostring()
        else:
            # parse from SLT string (no mathml information available)
            self.tree = SymbolTree.parse_from_slt(expression)
            self.expression = expression

        # print(self.tree.tostring() == expression)

        self.locations = []
        self.matched_elements = []
        self.unified_elements = []
        self.times_rendered = 0