def create_document(record_id, repo, **kwargs):
    """
    Creates document for full text search.
    It should be called in add_record_to_index method.
    """
    fields = []

    # Add repo and record_id to fields
    doc_id = repo + ':' + record_id
    fields.append(appengine_search.TextField(name='repo', value=repo))
    fields.append(appengine_search.TextField(name='record_id', value=record_id))

    # Add name and location romanized by unidecode
    for field in kwargs:
        romanized_value = script_variant.romanize_word(kwargs[field])
        fields.append(
            appengine_search.TextField(name=field, value=romanized_value))

    # Add name romanized by japanese name dictionary
    fields.extend(create_jp_name_fields(
        given_name=kwargs['given_name'],
        family_name=kwargs['family_name'],
        full_name=kwargs['full_name'],
        alternate_names=kwargs['alternate_names']))

    # Add location romanized by japanese location dictionary
    fields.extend(create_jp_location_fields(
        home_street=kwargs['home_street'],
        home_city=kwargs['home_city'],
        home_state=kwargs['home_state'],
        home_postal_code=kwargs['home_postal_code'],
        home_neighborhood=kwargs['home_neighborhood'],
        home_country=kwargs['home_country']))

    return appengine_search.Document(doc_id=doc_id, fields=fields)
def make_or_regexp(query_txt):
    """
    Creates compiled regular expression for OR search.
    Args:
        query_txt: Search query

    Returns:
        query_word | query_word | ...
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_word(word)
        query_list.extend(romanized_word_list)
    regexp = '|'.join([re.escape(word) for word in query_list if word])
    return re.compile(regexp, re.I)
Esempio n. 3
0
def make_or_regexp(query_txt):
    """
    Creates compiled regular expression for OR search.
    Args:
        query_txt: Search query

    Returns:
        query_word | query_word | ...
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_word(word)
        query_list.extend(romanized_word_list)
    regexp = '|'.join([re.escape(word) for word in query_list if word])
    return re.compile(regexp, re.I)
def create_romanized_query_txt(query_txt):
    """
    Applies romanization to each word in query_txt.
    Args:
        query_txt: Search query
    Returns:
        script varianted query_txt
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_word(word)
        romanized_word = ' OR '.join(
            enclose_in_double_quotes(word) for word in romanized_word_list)
        query_list.append(romanized_word)
    romanized_query = ','.join([word for word in query_list])
    return enclose_in_parenthesis(romanized_query)
Esempio n. 5
0
def create_romanized_query_txt(query_txt):
    """
    Applies romanization to each word in query_txt.
    Args:
        query_txt: Search query
    Returns:
        script varianted query_txt
    """
    query_words = query_txt.split(' ')
    query_list = []
    for word in query_words:
        romanized_word_list = script_variant.romanize_word(word)
        romanized_word = ' OR '.join(enclose_in_double_quotes(word)
                                     for word in romanized_word_list)
        query_list.append(romanized_word)
    romanized_query = ','.join([word for word in query_list])
    return enclose_in_parenthesis(romanized_query)