def _build_req_dict_for_map(trace_link_candidates, dataset: Dataset, reverse_compare=False): trace_link_candidates = list( dict.fromkeys(trace_link_candidates )) # Deterministic duplicate removal (set is not stable) if len(trace_link_candidates) < dataset.num_original_links(): # trace_link_candidates does not contain all possible link between reqs and code # -> Add missing links as dummy links with 0 similarity code_filenames = dataset.all_original_code_file_names(True) req_filenames = dataset.all_original_req_file_names(True) for req_name in req_filenames: for code_name in code_filenames: no_similarity = 1 if reverse_compare else 0 dummy_trace_link = TraceLink(MockEmbedding(req_name), MockEmbedding(code_name), no_similarity) if not dummy_trace_link in trace_link_candidates: trace_link_candidates.append(dummy_trace_link) assert len(trace_link_candidates) == dataset.num_original_links( ), f"{len(trace_link_candidates)} != {dataset.num_original_links()}" req_dict = { } # req_dict["req_name"] = [(sim_to_code_1: float, relevant: bool), (sim_to_code_2, relevant), ...] sol_matrix_copy = Util.deep_copy(dataset.solution_matrix( )) # Use copy to track false negatives and avoid duplicate trace links for trace_link in trace_link_candidates: req_name = trace_link.get_req_key(dataset.keys_with_extension()) code_name = trace_link.get_code_key(dataset.keys_with_extension()) sim_rel_tuple_to_add = (trace_link.similarity, False) if sol_matrix_copy.contains_req_code_pair(req_name, code_name): sim_rel_tuple_to_add = (trace_link.similarity, True) sol_matrix_copy.remove_trace_pair(req_name, code_name) if req_name in req_dict: req_dict[req_name].append(sim_rel_tuple_to_add) else: req_dict[req_name] = [sim_rel_tuple_to_add] if PRINT_FALSE_NEGATIVES: _print_false_negatives(sol_matrix_copy) return req_dict
def evaluateMAPRecall(trace_link_candidates, dataset: Dataset, reverse_compare=False): """ trace_link_candidates: List of TraceLink-objects """ if not trace_link_candidates: text = "No Trace Link candidates!" log.info(text) return 0, 0 req_dict = _build_req_dict_for_map(trace_link_candidates, dataset, reverse_compare) recall_map_dict = {} for k in range(1, len(dataset.all_original_code_file_names()) + 1): map_k, rec_k = Util.calculate_mean_average_precision_and_recall( req_dict, k, dataset.num_reqs(), dataset.solution_matrix()._number_of_trace_links, reverse_compare) recall_map_dict[rec_k] = map_k print(f"{k}: {rec_k}, {map_k}") return recall_map_dict