class XapianSearcher(object): def __init__(self, dirname): self.dbPath = os.path.abspath(dirname) self.conn = SearchConnection(self.dbPath) # can use 'reopen()' to open the db again def reopen(self): self.conn.reopen() def search(self, query, offset=0, page_size=10, summary_len=300): query = self.conn.spell_correct(query) words = query.split() words = [x for x in words if x not in STOPWORDS] query = ' OR '.join(words) #query = ' '.join(words) q = self.conn.query_field('text', query) res = self.conn.search(q, offset * page_size, page_size) def transform(r): doc = {'_id': r.id, 'title': r.data['title'][0], 'content': r.summarise('text', maxlen=summary_len), 'weight': r.weight } return doc ret = map(transform, res) return ret def close(self): self.conn.close()
class XapianSearcher(object): def __init__(self, dirname): self.dbPath = os.path.abspath(dirname) self.conn = SearchConnection(self.dbPath) # can use 'reopen()' to open the db again def reopen(self): self.conn.reopen() def search(self, query, offset=0, page_size=10, summary_len=300): query = self.conn.spell_correct(query) words = query.split() words = [x for x in words if x not in STOPWORDS] query = ' OR '.join(words) #query = ' '.join(words) q = self.conn.query_field('text', query) res = self.conn.search(q, offset * page_size, page_size) def transform(r): doc = { '_id': r.id, 'title': r.data['title'][0], 'content': r.summarise('text', maxlen=summary_len), 'weight': r.weight } return doc ret = map(transform, res) return ret def close(self): self.conn.close()
def _check_cache_results(self, indexpath, cachepath, cacheid, expected_results, num_results=10): # set cache manager cm = XapianCacheManager(cachepath, id=cacheid) search_conn = SearchConnection(indexpath) search_conn.set_cache_manager(cm) query_id, query_term = (1, "term_a") cache_query_id = cm.get_queryid(query_term) # obtain query_id from the cache self.assertEqual(query_id, cache_query_id) non_cached, cached = expected_results query = search_conn.query_field("field", query_term) base_result = [r.id for r in query.search(0, num_results)] # see if the results without merging the query are ok self.assertEqual(non_cached, base_result) cached_query = query.merge_with_cached(query_id) cached_result = [r.id for r in cached_query.search(0, num_results)] # test the merged query result self.assertEqual(cached, cached_result) search_conn.close() cm.close()
def get_connection(path, indexer=False, callback=None): """Get a connection to the database. This function reuses already existing connections. """ global _index_connection, _search_connections try: _connection_attemts = _new = 0 connection = None while _connection_attemts <= 3: try: if indexer: if _index_connection is None: _new = True _index_connection = IndexerConnection(path) connection = _index_connection else: thread = get_current_thread() if thread not in _search_connections: _new = True _search_connections[ thread] = connection = SearchConnection(path) else: connection = _search_connections[thread] except (xapian.DatabaseOpeningError, xapian.DatabaseLockError): time.sleep(0.5) _connection_attemts += 1 else: break if callback: callback(connection) if not _new: connection.reopen() yield connection finally: if connection is not None: connection.close() _index_connection = None
def __init__(self, dirname): self.dbPath = os.path.abspath(dirname) self.conn = SearchConnection(self.dbPath)