def find_substructures(expressions_data):
    sub_groups = []

    query_expression, candidates_data = expressions_data

    if len(candidates_data) > 1:
        query = Query("query", query_expression)

        # create query tree ....
        rank = -1
        scores  = [-1.0, 0, 0]
        for data_idx, candidate_data in enumerate(candidates_data):
            candidate_exp = candidate_data[0]
            rank = int(candidate_data[1])

            query.add_result(0, "", 0, candidate_exp, 0.0)

            result = query.results[candidate_exp]
            candidate_tree = result.tree

            try:
                scores, matched_q, matched_c, unified_c = similarity_v04(query.tree, candidate_tree, query.constraints)
            except:
                print("Error processing: ")
                print("Q: " + query_expression, flush=True)
                print("C: " + candidate_exp, flush=True)
                continue


            result.set_unified_elements(unified_c)
            result.set_matched_elements(matched_c)
            result.new_scores = scores


        query.sort_results()


        group = query.sorted_results[0]

        # for each sub group ...
        structures = []
        current_structure = 0
        for subgroup in group:
            # next substructure group in the overall rank...
            current_structure += 1

            structure_elements = []
            for sg_idx, expression in enumerate(subgroup):
                structure_elements.append(expression)

            structures.append(structure_elements)
    else:
        # just one expression in rank, no need to re-evaluate score ...
        candidate_data = candidates_data[0]
        candidate_exp = candidate_data[0]
        rank = int(candidate_data[1])
        scores = [float(part) for part in candidate_data[2:5]]

        # the list of structures only contains one structure with the same structure
        structures = [[candidate_exp]]


    return (rank, scores, structures)
Beispiel #2
0
def find_substructures(expressions_data):
    sub_groups = []

    query_expression, candidates_data = expressions_data

    if len(candidates_data) > 1:
        query = Query("query", query_expression)

        # create query tree ....
        rank = -1
        scores = None
        prev_scores = None

        for data_idx, candidate_data in enumerate(candidates_data):
            candidate_exp = candidate_data[0]
            rank = int(candidate_data[1])

            query.add_result(0, "", 0, candidate_exp, 0.0)

            result = query.results[candidate_exp]

            try:
                sim_res = SIM_FUNCTION(query.tree, result.tree, query.constraints)
                scores, matched_q, matched_c, unified_c, wildcard_c, unified = sim_res
            except:
                print("Error processing: ")
                print("Q: " + query_expression, flush=True)
                print("C: " + candidate_exp, flush=True)
                continue

            result.set_unified_elements(unified_c)
            result.set_matched_elements(matched_c)
            result.set_wildcard_matches(wildcard_c)
            result.new_scores = scores

            if prev_scores is None:
                prev_scores = scores
            else:
                if prev_scores != scores:
                    print("Error: Scores changed!")
                    print(prev_scores)
                    print(scores)
                    prev_scores = scores

        query.sort_results()

        if len(query.sorted_results) > 1:
            print("Error: Did not expect More than 1 group")
            print("-> " + str(len(query.sorted_results)))


        group = query.sorted_results[0]

        # for each sub group ...
        structures = []
        current_structure = 0
        for subgroup in group:
            # next substructure group in the overall rank...
            current_structure += 1

            structure_elements = []
            for sg_idx, expression in enumerate(subgroup):
                structure_elements.append(expression)

            structures.append(structure_elements)

    else:
        # just one expression in rank, no need to re-evaluate score ...
        candidate_data = candidates_data[0]
        candidate_exp = candidate_data[0]
        rank = int(candidate_data[1])
        scores = [float(part) for part in candidate_data[2:(2 + N_SCORES)]]

        # the list of structures only contains one structure with the same structure
        structures = [[candidate_exp]]

    return (rank, scores, structures)