Exemple #1
0
class XapianSearcher(object):

    def __init__(self, dirname):
        self.dbPath = os.path.abspath(dirname)
        self.conn = SearchConnection(self.dbPath)
        # can use 'reopen()' to open the db again

    def reopen(self):
        self.conn.reopen()

    def search(self, query, offset=0, page_size=10, summary_len=300):
        query = self.conn.spell_correct(query)
        words = query.split()
        words = [x for x in words if x not in STOPWORDS]
        query = ' OR '.join(words)
        #query = ' '.join(words)
        q = self.conn.query_field('text', query)

        res = self.conn.search(q, offset * page_size, page_size)

        def transform(r):
            doc = {'_id': r.id,
                    'title': r.data['title'][0],
                    'content': r.summarise('text', maxlen=summary_len),
                    'weight': r.weight
                   }
            return doc

        ret = map(transform, res)
        return ret

    def close(self):
        self.conn.close()
Exemple #2
0
class XapianSearcher(object):
    def __init__(self, dirname):
        self.dbPath = os.path.abspath(dirname)
        self.conn = SearchConnection(self.dbPath)
        # can use 'reopen()' to open the db again

    def reopen(self):
        self.conn.reopen()

    def search(self, query, offset=0, page_size=10, summary_len=300):
        query = self.conn.spell_correct(query)
        words = query.split()
        words = [x for x in words if x not in STOPWORDS]
        query = ' OR '.join(words)
        #query = ' '.join(words)
        q = self.conn.query_field('text', query)

        res = self.conn.search(q, offset * page_size, page_size)

        def transform(r):
            doc = {
                '_id': r.id,
                'title': r.data['title'][0],
                'content': r.summarise('text', maxlen=summary_len),
                'weight': r.weight
            }
            return doc

        ret = map(transform, res)
        return ret

    def close(self):
        self.conn.close()
Exemple #3
0
    def _check_cache_results(self, indexpath, cachepath, cacheid, expected_results, num_results=10):
        # set cache manager
        cm = XapianCacheManager(cachepath, id=cacheid)

        search_conn = SearchConnection(indexpath)
        search_conn.set_cache_manager(cm)

        query_id, query_term = (1, "term_a")
        cache_query_id = cm.get_queryid(query_term)  # obtain query_id from the cache
        self.assertEqual(query_id, cache_query_id)

        non_cached, cached = expected_results

        query = search_conn.query_field("field", query_term)
        base_result = [r.id for r in query.search(0, num_results)]
        # see if the results without merging the query are ok
        self.assertEqual(non_cached, base_result)

        cached_query = query.merge_with_cached(query_id)
        cached_result = [r.id for r in cached_query.search(0, num_results)]
        # test the merged query result
        self.assertEqual(cached, cached_result)

        search_conn.close()
        cm.close()
Exemple #4
0
def get_connection(path, indexer=False, callback=None):
    """Get a connection to the database.

    This function reuses already existing connections.
    """
    global _index_connection, _search_connections

    try:
        _connection_attemts = _new = 0
        connection = None
        while _connection_attemts <= 3:
            try:
                if indexer:
                    if _index_connection is None:
                        _new = True
                        _index_connection = IndexerConnection(path)
                    connection = _index_connection
                else:
                    thread = get_current_thread()
                    if thread not in _search_connections:
                        _new = True
                        _search_connections[
                            thread] = connection = SearchConnection(path)
                    else:
                        connection = _search_connections[thread]
            except (xapian.DatabaseOpeningError, xapian.DatabaseLockError):
                time.sleep(0.5)
                _connection_attemts += 1
            else:
                break

        if callback:
            callback(connection)

        if not _new:
            connection.reopen()
        yield connection
    finally:
        if connection is not None:
            connection.close()
            _index_connection = None
Exemple #5
0
 def __init__(self, dirname):
     self.dbPath = os.path.abspath(dirname)
     self.conn = SearchConnection(self.dbPath)
Exemple #6
0
 def __init__(self, dirname):
     self.dbPath = os.path.abspath(dirname)
     self.conn = SearchConnection(self.dbPath)