Beispiel #1
0
def _tokenize(string):
    words = []
    for word in tokenize(string):
        try:
            words.append(normalize(word))
        except StopWord:
            continue
    return words
Beispiel #2
0
 def get_words(self):
     """Return words to be indexed (a word is an unicode string).
     """
     for attr in self.__indexable_attributes:
         value = getattr(self, attr, None)
         if value is None:
             continue
         for word in tokenize(value):
             yield word
Beispiel #3
0
 def execute(self, querystr, cursor=None):
     """Execute a full text query and return a list of 2-uple (rating, uid).
     """
     if isinstance(querystr, str):
         querystr = unicode(querystr, self.encoding)
     words = normalize_words(tokenize(querystr))
     cursor = cursor or self._cnx.cursor()
     cursor.execute(
         'SELECT 1, uid FROM appears '
         'WHERE MATCH (words) AGAINST (%(words)s IN BOOLEAN MODE)',
         {'words': ' '.join(words)})
     return cursor.fetchall()
Beispiel #4
0
 def restriction_sql(self, tablename, querystr, jointo=None, not_=False):
     """Execute a full text query and return a list of 2-uple (rating, uid).
     """
     if isinstance(querystr, str):
         querystr = unicode(querystr, self.encoding)
     words = normalize_words(tokenize(querystr))
     sql = "MATCH (%s.words) AGAINST ('%s' IN BOOLEAN MODE)" % (
         tablename, ' '.join(words))
     if not_:
         sql = 'NOT (%s)' % sql
     if jointo is None:
         return sql
     return "%s AND %s.uid=%s" % (sql, tablename, jointo)
Beispiel #5
0
 def execute(self, querystr, cursor=None):
     """Execute a full text query and return a list of 2-uple (rating, uid).
     """
     if isinstance(querystr, str):
         querystr = unicode(querystr, self.encoding)
     words = normalize_words(tokenize(querystr))
     cursor = cursor or self._cnx.cursor()
     cursor.execute(
         'SELECT 1, uid FROM appears '
         "WHERE words @@ to_tsquery(%(config)s, %(words)s)", {
             'config': self.config,
             'words': '&'.join(words)
         })
     return cursor.fetchall()
Beispiel #6
0
 def restriction_sql(self, tablename, querystr, jointo=None, not_=False):
     """Execute a full text query and return a list of 2-uple (rating, uid).
     """
     if isinstance(querystr, str):
         querystr = unicode(querystr, self.encoding)
     words = normalize_words(tokenize(querystr))
     # XXX replace '%' since it makes tsearch fail, dunno why yet, should
     # be properly fixed
     searched = '&'.join(words).replace('%', '')
     sql = "%s.words @@ to_tsquery('%s', '%s')" % (tablename, self.config,
                                                   searched)
     if not_:
         sql = 'NOT (%s)' % sql
     if jointo is None:
         return sql
     return "%s AND %s.uid=%s" % (sql, tablename, jointo)
Beispiel #7
0
 def restriction_sql(self, tablename, querystr, jointo=None, not_=False):
     if isinstance(querystr, str):
         querystr = unicode(querystr, self.encoding)
     words = []
     for word in tokenize(querystr):
         try:
             words.append("'%s'" % normalize(word))
         except StopWord:
             continue
     sql = '%s.word_id IN (SELECT word_id FROM word WHERE word in (%s))' % (
         tablename, ', '.join(words))
     if not_:
         sql = 'NOT (%s)' % sql
     if jointo is None:
         return sql
     return '%s AND %s.uid=%s' % (sql, tablename, jointo)
 def get_words(self):
     return tokenize(u'gïnco-jpl blâ blîp blôp blàp')
Beispiel #9
0
 def _get_words(self, buffer):
     """ extract word from a plain text buffer """
     for line in buffer.xreadlines():
         for word in tokenize(unicode(line, self.encoding)):
             yield word