def get_descriptive_substitutions(syn_svc, list_desc): all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=list_desc, words_are_distinctive=False).data desc_substitution_dict = parse_dict_of_lists( all_desc_substitutions_synonyms) return desc_substitution_dict
def get_distinctive_substitutions(syn_svc, list_dist): all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=list_dist, words_are_distinctive=True).data dist_substitution_dict = parse_dict_of_lists( all_dist_substitutions_synonyms) return dist_substitution_dict
def _set_entity_type_end_designation(self): syn_svc = self.synonym_service designation_end_list = self._designation_end_list all_end_designations = syn_svc.get_all_end_designations().data self._entity_type_end_designation = syn_svc.get_entity_type_end_designation( entity_end_designation_dict=parse_dict_of_lists( all_end_designations), all_designation_any_end_list=designation_end_list).data
def _set_entity_type_any_designation(self): syn_svc = self.synonym_service # entity_any_designation_dict = self._entity_any_designation_dict designation_any_list = self._designation_any_list designations = syn_svc.get_all_end_designations().data designations_dict = parse_dict_of_lists(designations) self._entity_type_any_designation = syn_svc.get_entity_type_any_designation( entity_any_designation_dict=designations_dict, all_designation_any_end_list=designation_any_list).data
def get_substitutions_distinctive(self, w_dist): syn_svc = self.synonym_service all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=w_dist, words_are_distinctive=True).data dist_substitution_dict = parse_dict_of_lists( all_dist_substitutions_synonyms) for key, value in dist_substitution_dict.items(): if key not in value: value.append(key) return dist_substitution_dict
def get_substitutions_descriptive(self, w_desc): syn_svc = self.synonym_service all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=[desc.replace(" ", "") for desc in w_desc], words_are_distinctive=False).data desc_synonym_dict = parse_dict_of_lists( all_desc_substitutions_synonyms) for key, value in desc_synonym_dict.items(): if key not in value: value.append(key) return desc_synonym_dict
def get_all_substitutions(syn_svc, list_dist, list_desc, list_name): all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=list_dist, words_are_distinctive=True).data dist_substitution_dict = parse_dict_of_lists( all_dist_substitutions_synonyms) all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=list_desc, words_are_distinctive=False).data desc_substitution_dict = parse_dict_of_lists( all_desc_substitutions_synonyms) all_substitution_dict = collections.OrderedDict() for word in list_name: if word in dist_substitution_dict: all_substitution_dict[word] = dist_substitution_dict[word] elif word in desc_substitution_dict: all_substitution_dict[word] = desc_substitution_dict[word] for k, v in all_substitution_dict.items(): all_substitution_dict[k] = [porter.stem(e.lower()) for e in v] return all_substitution_dict, dist_substitution_dict, desc_substitution_dict
def search_conflicts(self, list_dist_words, list_desc_words, list_name, name): syn_svc = self.synonym_service result = ProcedureResult() result.is_valid = False all_matches_list = [] # Contains all the conflicts from database most_similar_names = [] dict_highest_counter = {} dict_highest_detail = {} response = {} for w_dist, w_desc in zip(list_dist_words, list_desc_words): dist_substitution_list = [] desc_synonym_list = [] dist_all_permutations = [] all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=w_dist, words_are_distinctive=True).data dist_substitution_dict = parse_dict_of_lists( all_dist_substitutions_synonyms) dist_substitution_list = dist_substitution_dict.values() all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms( words=w_desc, words_are_distinctive=False).data desc_synonym_dict = parse_dict_of_lists( all_desc_substitutions_synonyms) desc_synonym_list = desc_synonym_dict.values() # Inject distinctive section in query for dist in dist_substitution_list: criteria = Request.get_general_query() criteria = Request.get_query_distinctive_descriptive( dist, criteria, True) # Inject descriptive section into query, execute and add matches to list for desc in desc_synonym_list: matches = Request.get_query_distinctive_descriptive( desc, criteria) all_matches_list.extend(matches) dict_highest_counter, dict_highest_detail = self.get_most_similar_names( dict_highest_counter, dict_highest_detail, matches, w_dist, w_desc, list_name, name) most_similar_names.extend( list({ k for k, v in sorted(dict_highest_counter.items(), key=lambda item: (-item[1], len(item[0]))) [0:MAX_MATCHES_LIMIT] })) if most_similar_names: response = self.prepare_response(all_matches_list, most_similar_names, dict_highest_detail) if response: result.is_valid = False result.result_code = AnalysisIssueCodes.CORPORATE_CONFLICT result.values = { 'list_name': list_name, 'list_dist': list_dist_words, 'list_desc': list_desc_words, 'list_conflicts': response['names'], 'corp_num': response['corp_num'], 'consumption_date': response['consumption_date'] } else: result.is_valid = True result.result_code = AnalysisIssueCodes.CHECK_IS_VALID result.values = [] return result