def _index_unicode(xdoc, value, prefix, language, termpos, TRANSLATE_MAP=TRANSLATE_MAP): # Check type if type(value) is not unicode: msg = 'The value "%s", field "%s", is not a unicode' raise TypeError, msg % (value, prefix) # Case 1: Japanese or Chinese if language in ['ja', 'zh']: return _index_cjk(xdoc, value, prefix, termpos) # Case 2: Any other language tg = TermGenerator() tg.set_document(xdoc) tg.set_termpos(termpos - 1) # Suppress the accents (FIXME This should be done by the stemmer) value = value.translate(TRANSLATE_MAP) # XXX With the stemmer, the words are saved twice: # with prefix and with Zprefix # tg.set_stemmer(stemmer) tg.index_text(value, 1, prefix) return tg.get_termpos() + 1
def _index_unicode(xdoc, value, prefix, language, termpos): # Japanese or Chinese if language in ['ja', 'zh']: return _index_cjk(xdoc, value, prefix, termpos) # Any other language tg = TermGenerator() tg.set_document(xdoc) tg.set_termpos(termpos - 1) # XXX The words are saved twice: with prefix and with Zprefix #tg.set_stemmer(stemmer) tg.index_text(value, 1, prefix) return tg.get_termpos() + 1
def _index_unicode(xdoc, value, prefix, language, termpos): # Japanese or Chinese if language in ["ja", "zh"]: return _index_cjk(xdoc, value, prefix, termpos) # Any other language tg = TermGenerator() tg.set_document(xdoc) tg.set_termpos(termpos - 1) # XXX The words are saved twice: with prefix and with Zprefix # tg.set_stemmer(stemmer) tg.index_text(value, 1, prefix) return tg.get_termpos() + 1