def eval_similarity(query_data):
    # do actually evaluate similarity ....
    query, start_idx, expressions = query_data

    csv_reader = csv.reader(expressions,
                            delimiter='\t',
                            lineterminator='\n',
                            quoting=csv.QUOTE_NONE,
                            escapechar="\\")

    end_idx = start_idx + len(expressions) - 1

    #create query slt
    query_name, query_expression = query
    query_tree = SymbolTree.parse_from_slt(query_expression)
    query_constraints = Query.create_default_constraints(query_tree)

    results = []
    for idx, parts in enumerate(csv_reader):
        #for idx, expression_info in enumerate(expressions):
        #parts = expression_info.strip().split("\t")
        expression = parts[0]
        doc_id = parts[1]
        location = parts[2]

        candidate_tree = SymbolTree.parse_from_slt(expression)

        try:
            data = SIM_FUNCTION(query_tree, candidate_tree, query_constraints)
            scores = data[0]
        except:
            print("Error processing: ")
            print(query_expression, flush=True)
            print(expression, flush=True)
            print("Doc: " + doc_id, flush=True)
            print("Loc: " + location, flush=True)
            continue

        # the index is only returned because some expressions might be absent in case of errors
        results.append((scores, start_idx + idx))

    print("Processed: " + str(start_idx) + " to " + str(end_idx) + " finished",
          flush=True)

    return results
def eval_similarity(query_data):
    # do actually evaluate similarity ....
    query, start_idx, expressions = query_data

    end_idx = start_idx + len(expressions) - 1

    #create query slt
    query_name, query_expression = query
    query_tree = SymbolTree.parse_from_slt(query_expression)
    query_constraints = Query.create_default_constraints(query_tree)

    results = []
    for idx, expression_info in enumerate(expressions):
        parts = expression_info.strip().split("\t")
        expression = parts[0]
        doc_id = parts[1]
        location = parts[2]

        candidate_tree = SymbolTree.parse_from_slt(expression)

        try:
            scores, matched_q, matched_c, unified_c = similarity_v04(query_tree, candidate_tree, query_constraints)
        except:
            print("Error processing: ")
            print(query_expression, flush=True)
            print(expression, flush=True)
            print("Doc: " + doc_id, flush=True)
            print("Loc: " + location, flush=True)
            continue

        # the index is only returned because some expressions might be absent in case of errors
        results.append((scores, start_idx + idx))

    print("Processed: " + str(start_idx) + " to " + str(end_idx) + " finished", flush=True)

    return results
    def pivot_by_docs(self, how):
        # process all query results
        """
        how = "core" => use core value ranks directly
              "MSS" => use reranking scores
        """
        self.by_document = {}
        ##        intID = True        # CHANGED TO MATCH ON DOC NAME ALWAYS

        if self.tquery:
            for doc_id in self.tquery.results.keys():
                ##                try:
                ##                    intID = (int(doc_id) == doc_id) # True if doc_id is an integer
                ##                except:
                ##                    intID = False # otherwise need to match on filename
                (docname, score, positions) = self.tquery.results[doc_id]
                # add document if first time seen
                # join on docname, not doc_id
                try:
                    doc = self.by_document[docname]
                except:
                    doc = CompQueryResult(doc_id, docname)
                    self.by_document[docname] = doc
                # add score of keyword match to current document
                doc.set_tscore(score)
                doc.set_tpos(positions)
        if self.mqueries:
            for qexprnum, query in enumerate(self.mqueries):
                # keep scores for all existing formulas over all documents
                for result in query.results.values():
                    # N.B. only one Result structure per matched formula expression
                    #print("Candidate: " + result.tree.tostring(), flush=True)

                    if how == "MSS":  # compute the MSS score if requested
                        sim_res = similarity_v06(
                            query.tree, result.tree,
                            Query.create_default_constraints(query.tree))
                        result.new_scores = sim_res[
                            0]  # scores returned as first component of result -- other components are node sets
                    elif how == "v09":
                        sim_res = similarity_v09(
                            query.tree, result.tree,
                            Query.create_default_constraints(query.tree))
                        result.new_scores = sim_res[0]  # only use scores
                    elif how == "v10":
                        sim_res = similarity_v10(
                            query.tree, result.tree,
                            Query.create_default_constraints(query.tree))
                        result.new_scores = sim_res[0]  # only use scores
                    elif how == "v11":
                        sim_res = similarity_v11(
                            query.tree, result.tree,
                            Query.create_default_constraints(query.tree))
                        result.new_scores = sim_res[0]  # only use scores
                    else:
                        result.new_scores = [
                            result.original_score
                        ]  # otherwise, just use original score

                    for doc_id, offset in result.locations:
                        title = query.documents[doc_id]
                        #title = title.rpartition('\\')[2]    # just last part
                        title = os.path.basename(title)  # just last part (KMD)

                        ##                        if not intID: # join on title instead of doc_id
                        joiner = title

                        ##                        else:
                        ##                            joiner = doc_id
                        # add document if first time seen
                        try:
                            doc = self.by_document[joiner]
                            doc.doc_id = doc_id  # prefer using math ids (to match positions later)
                        except:
                            doc = CompQueryResult(doc_id, title)
                            self.by_document[joiner] = doc
                        # add current result to current document
                        doc.add_mscore(qexprnum, result)