def test_romanize_search_query(word): results = script_variant.romanize_search_query(u'天海') # Two possible Japanese romanizations. assert u'AMAMI' in results assert u'TENKAI' in results # Chinese romanization. assert u'Tian Hai' in results
def is_query_match(query_txt, romanized_values): """ Checks if a query matches a record It should be called in get_person_ids_from_results method. Args: query_txt: Search query romanized_values: field values Returns: Boolean """ # empty matches everything if not query_txt: return True romanized_query_list = (script_variant.romanize_search_query(query_txt)) # A query matches a record if all search_terms appear in the record for search_terms in romanized_query_list: words = search_terms.split(" ") for word_index, word in enumerate(words): if not re.search(word, " ".join(romanized_values), re.I): break if word_index == len(words) - 1: return True return False
def make_or_regexp(query_txt): """ Creates compiled regular expression for OR search. Args: query_txt: Search query Returns: query_word | query_word | ... """ query_words = query_txt.split(' ') query_list = [] for word in query_words: romanized_word_list = script_variant.romanize_search_query(word) query_list.extend(romanized_word_list) regexp = '|'.join([re.escape(word) for word in query_list if word]) return re.compile(regexp, re.I)
def create_romanized_query_txt(query_txt): """ Applies romanization to each word in query_txt. Args: query_txt: Search query Returns: script varianted query_txt """ query_words = query_txt.split(' ') query_list = [] for word in query_words: romanized_word_list = script_variant.romanize_search_query(word) romanized_word = ' OR '.join(enclose_in_double_quotes(word) for word in romanized_word_list) query_list.append(romanized_word) romanized_query = ','.join([word for word in query_list]) return enclose_in_parenthesis(romanized_query)