コード例 #1
0
 def test_romanize_search_query(word):
     results = script_variant.romanize_search_query(u'天海')
     # Two possible Japanese romanizations.
     assert u'AMAMI' in results
     assert u'TENKAI' in results
     # Chinese romanization.
     assert u'Tian Hai' in results
コード例 #2
0
def is_query_match(query_txt, romanized_values):
    """
    Checks if a query matches a record
    It should be called in get_person_ids_from_results method.
    Args:
        query_txt: Search query
        romanized_values: field values
    Returns:
        Boolean
    """
    # empty matches everything
    if not query_txt:
        return True

    romanized_query_list = (script_variant.romanize_search_query(query_txt))

    # A query matches a record if all search_terms appear in the record
    for search_terms in romanized_query_list:
        words = search_terms.split(" ")
        for word_index, word in enumerate(words):
            if not re.search(word, " ".join(romanized_values), re.I):
                break
            if word_index == len(words) - 1:
                return True
    return False
コード例 #3
0
def is_query_match(query_txt, romanized_values):
    """
    Checks if a query matches a record
    It should be called in get_person_ids_from_results method.
    Args:
        query_txt: Search query
        romanized_values: field values
    Returns:
        Boolean
    """
    # empty matches everything
    if not query_txt:
        return True

    romanized_query_list = (script_variant.romanize_search_query(query_txt))

    # A query matches a record if all search_terms appear in the record
    for search_terms in romanized_query_list:
        words = search_terms.split(" ")
        for word_index, word in enumerate(words):
            if not re.search(word, " ".join(romanized_values), re.I):
                break
            if word_index == len(words) - 1:
                return True
    return False
コード例 #4
0
def make_or_regexp(query_txt):
    """
    Creates compiled regular expression for OR search.
    Args:
        query_txt: Search query

    Returns:
        query_word | query_word | ...
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_search_query(word)
        query_list.extend(romanized_word_list)
    regexp = '|'.join([re.escape(word) for word in query_list if word])
    return re.compile(regexp, re.I)
コード例 #5
0
def make_or_regexp(query_txt):
    """
    Creates compiled regular expression for OR search.
    Args:
        query_txt: Search query

    Returns:
        query_word | query_word | ...
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_search_query(word)
        query_list.extend(romanized_word_list)
    regexp = '|'.join([re.escape(word) for word in query_list if word])
    return re.compile(regexp, re.I)
コード例 #6
0
def create_romanized_query_txt(query_txt):
    """
    Applies romanization to each word in query_txt.
    Args:
        query_txt: Search query
    Returns:
        script varianted query_txt
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_search_query(word)
        romanized_word = ' OR '.join(enclose_in_double_quotes(word)
                                     for word in romanized_word_list)
        query_list.append(romanized_word)
    romanized_query = ','.join([word for word in query_list])
    return enclose_in_parenthesis(romanized_query)
コード例 #7
0
def create_romanized_query_txt(query_txt):
    """
    Applies romanization to each word in query_txt.
    Args:
        query_txt: Search query
    Returns:
        script varianted query_txt
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_search_query(word)
        romanized_word = ' OR '.join(enclose_in_double_quotes(word)
                                     for word in romanized_word_list)
        query_list.append(romanized_word)
    romanized_query = ','.join([word for word in query_list])
    return enclose_in_parenthesis(romanized_query)