def _compute_match(self, actual_params, formal_params): matches = 0 size = len(actual_params) if size != formal_params.count(): matches = 0 else: actuals = [ je.clean_java_name(actual_param)[0] for actual_param in actual_params ] formals = [ formal_param.type_simple_name for formal_param in formal_params ] for (actual, formal) in zip(actuals, formals): similarity = su.pairwise_simil(actual.lower(), formal.lower()) if similarity >= self.PARAM_SIMILARITY_THRESHOLD: matches += 1 # We don't want to far half-matches methods because this is too # fragile. if (float(matches) / float(size)) < self.PARAM_SIZE_THRESHOLD: matches = 0 return matches
def _get_potentials_by_similarity(self, potentials, fqn_container): new_potentials = [] max_similarity = 0.0 (container_simple, _) = je.clean_java_name(fqn_container) container_tokens = [ token.lower() for token in su.tokenize(container_simple) ] container_simple_lower = container_simple.lower() similarities = [] for potential in potentials: (simple, _) = je.clean_java_name(get_container(potential).fqn) potential_tokens = [token.lower() for token in su.tokenize(simple)] simple_lower = simple.lower() common_token = self._get_common_token_ratio( container_tokens, potential_tokens) psimilarity = su.pairwise_simil(container_simple_lower, simple_lower) # This is the minimum required by this filter: if common_token == 0.0 or psimilarity < self.PAIRWISE_THRESHOLD: continue similarity = max(common_token, psimilarity) if similarity > max_similarity: max_similarity = similarity similarities.append((potential, similarity)) # Only keep the elements that match the threshold # Or accept elements that are fuzzily near the max_similarity if max_similarity < self.HIGH_SIMILARITY: max_similarity = max_similarity - self.DIFFERENCE_THRESHOLD for (potential, similarity) in similarities: if similarity >= max_similarity: new_potentials.append(potential) return new_potentials
def _get_potentials_by_similarity(self, potentials, fqn_container): new_potentials = [] max_similarity = 0.0 (container_simple, _) = je.clean_java_name(fqn_container) container_tokens = [token.lower() for token in su.tokenize(container_simple)] container_simple_lower = container_simple.lower() similarities = [] for potential in potentials: (simple, _) = je.clean_java_name(get_container(potential).fqn) potential_tokens = [token.lower() for token in su.tokenize(simple)] simple_lower = simple.lower() common_token = self._get_common_token_ratio(container_tokens, potential_tokens) psimilarity = su.pairwise_simil(container_simple_lower, simple_lower) # This is the minimum required by this filter: if common_token == 0.0 or psimilarity < self.PAIRWISE_THRESHOLD: continue similarity = max(common_token, psimilarity) if similarity > max_similarity: max_similarity = similarity similarities.append((potential, similarity)) # Only keep the elements that match the threshold # Or accept elements that are fuzzily near the max_similarity if max_similarity < self.HIGH_SIMILARITY: max_similarity = max_similarity - self.DIFFERENCE_THRESHOLD for (potential, similarity) in similarities: if similarity >= max_similarity: new_potentials.append(potential) return new_potentials
def _compute_match(self, actual_params, formal_params): matches = 0 size = len(actual_params) if size != formal_params.count(): matches = 0 else: actuals = [je.clean_java_name(actual_param)[0] for actual_param in actual_params] formals = [formal_param.type_simple_name for formal_param in formal_params] for (actual, formal) in zip(actuals, formals): similarity = su.pairwise_simil(actual.lower(), formal.lower()) if similarity >= self.PARAM_SIMILARITY_THRESHOLD: matches += 1 # We don't want to far half-matches methods because this is too # fragile. if (float(matches) / float(size)) < self.PARAM_SIZE_THRESHOLD: matches = 0 return matches
def get_code_element(self, scode_reference, code_elements, simple, fqn, log, insensitive=False): log.reset_variables() return_code_element = None potentials = code_elements if code_elements is None: size = 0 else: size = len(code_elements) # DEBUG print('DEBUG for {0}'.format(scode_reference.content)) for code_element in code_elements: print(code_element.fqn) if size > 0: if size == 1: # There is only one code element. return_code_element = code_elements[0] potentials = [return_code_element] log.one = True elif fqn == simple \ or fqn.find(UNKNOWN_PACKAGE) != -1 \ or fqn.find(SNIPPET_PACKAGE) != -1: # Many elements and fqn is unknown return_code_element = code_elements[0] log.arbitrary = True elif insensitive: # Do an insensitive comparison on the fqn. fqn_lower = fqn.lower() sims = [ su.pairwise_simil(fqn_lower, code_element.fqn.lower()) for code_element in code_elements ] max_sim = max(sims) index = sims.index(max_sim) return_code_element = code_elements[index] del (potentials[index]) potentials.insert(0, return_code_element) if max_sim >= FQN_SIMILARITY_THRESHOLD: potentials = [return_code_element] log.insensitive = True else: # Do a case sensitive comparison on the fqn sims = [ su.pairwise_simil(fqn, code_element.fqn) for code_element in code_elements ] max_sim = max(sims) index = sims.index(max_sim) return_code_element = code_elements[index] del (potentials[index]) potentials.insert(0, return_code_element) if max_sim >= FQN_SIMILARITY_THRESHOLD: potentials = [return_code_element] log.sensitive = True filter_results = [] for afilter in self.class_filters: finput = filters.FilterInput(scode_reference, potentials, fqn, log, None, None, filter_results) result = afilter.filter(finput) potentials = result.potentials filter_results.append(result) potentials_size = len(potentials) if potentials_size > 0: return_code_element = potentials[0] log.custom_filtered = filters.custom_filtered(filter_results) # Logging log.log_type(simple, fqn, scode_reference, return_code_element, potentials, size) return (return_code_element, potentials)
def get_code_element(self, scode_reference, code_elements, simple, fqn, log, insensitive=False): log.reset_variables() return_code_element = None potentials = code_elements if code_elements is None: size = 0 else: size = len(code_elements) # DEBUG print('DEBUG for {0}'.format(scode_reference.content)) for code_element in code_elements: print(code_element.fqn) if size > 0: if size == 1: # There is only one code element. return_code_element = code_elements[0] potentials = [return_code_element] log.one = True elif fqn == simple \ or fqn.find(UNKNOWN_PACKAGE) != -1 \ or fqn.find(SNIPPET_PACKAGE) != -1: # Many elements and fqn is unknown return_code_element = code_elements[0] log.arbitrary = True elif insensitive: # Do an insensitive comparison on the fqn. fqn_lower = fqn.lower() sims = [su.pairwise_simil(fqn_lower, code_element.fqn.lower()) for code_element in code_elements] max_sim = max(sims) index = sims.index(max_sim) return_code_element = code_elements[index] del(potentials[index]) potentials.insert(0, return_code_element) if max_sim >= FQN_SIMILARITY_THRESHOLD: potentials = [return_code_element] log.insensitive = True else: # Do a case sensitive comparison on the fqn sims = [su.pairwise_simil(fqn, code_element.fqn) for code_element in code_elements] max_sim = max(sims) index = sims.index(max_sim) return_code_element = code_elements[index] del(potentials[index]) potentials.insert(0, return_code_element) if max_sim >= FQN_SIMILARITY_THRESHOLD: potentials = [return_code_element] log.sensitive = True filter_results = [] for afilter in self.class_filters: finput = filters.FilterInput(scode_reference, potentials, fqn, log, None, None, filter_results) result = afilter.filter(finput) potentials = result.potentials filter_results.append(result) potentials_size = len(potentials) if potentials_size > 0: return_code_element = potentials[0] log.custom_filtered = filters.custom_filtered(filter_results) # Logging log.log_type(simple, fqn, scode_reference, return_code_element, potentials, size) return (return_code_element, potentials)