class IndexStore(object): """Index metadata and provide rich query facilities on it. """ def __init__(self): self._database = None self._flush_timeout = None self._pending_writes = 0 root_path = layoutmanager.get_instance().get_root_path() self._index_updated_path = os.path.join(root_path, 'index_updated') self._std_index_path = layoutmanager.get_instance().get_index_path() self._index_path = self._std_index_path def open_index(self, temp_path=False): # callers to open_index must be able to # handle an exception -- usually caused by # IO errors such as ENOSPC and retry putting # the index on a temp_path if temp_path: try: # mark the on-disk index stale self._set_index_updated(False) except: pass self._index_path = temp_path else: self._index_path = self._std_index_path try: self._database = WritableDatabase(self._index_path, xapian.DB_CREATE_OR_OPEN) except Exception as e: logging.error('Exception opening database') raise def close_index(self): """Close index database if it is open.""" if not self._database: return self._flush(True) try: # does Xapian write in its destructors? self._database = None except Exception as e: logging.error('Exception tearing down database') raise def remove_index(self): if not os.path.exists(self._index_path): return for f in os.listdir(self._index_path): os.remove(os.path.join(self._index_path, f)) def contains(self, uid): postings = self._database.postlist(_PREFIX_FULL_VALUE + \ _PREFIX_UID + uid) try: __ = postings.next() except StopIteration: return False return True def store(self, uid, properties): document = Document() document.add_value(_VALUE_UID, uid) term_generator = TermGenerator() term_generator.index_document(document, properties) if not self.contains(uid): self._database.add_document(document) else: self._database.replace_document(_PREFIX_FULL_VALUE + \ _PREFIX_UID + uid, document) self._flush(True) def find(self, query): offset = query.pop('offset', 0) limit = query.pop('limit', MAX_QUERY_LIMIT) order_by = query.pop('order_by', []) query_string = query.pop('query', None) query_parser = QueryParser() query_parser.set_database(self._database) enquire = Enquire(self._database) enquire.set_query(query_parser.parse_query(query, query_string)) # This will assure that the results count is exact. check_at_least = offset + limit + 1 if not order_by: order_by = '+timestamp' else: order_by = order_by[0] if order_by == '+timestamp': enquire.set_sort_by_value(_VALUE_TIMESTAMP, True) elif order_by == '-timestamp': enquire.set_sort_by_value(_VALUE_TIMESTAMP, False) elif order_by == '+title': enquire.set_sort_by_value(_VALUE_TITLE, True) elif order_by == '-title': enquire.set_sort_by_value(_VALUE_TITLE, False) elif order_by == '+filesize': enquire.set_sort_by_value(_VALUE_FILESIZE, True) elif order_by == '-filesize': enquire.set_sort_by_value(_VALUE_FILESIZE, False) elif order_by == '+creation_time': enquire.set_sort_by_value(_VALUE_CREATION_TIME, True) elif order_by == '-creation_time': enquire.set_sort_by_value(_VALUE_CREATION_TIME, False) else: logging.warning('Unsupported property for sorting: %s', order_by) query_result = enquire.get_mset(offset, limit, check_at_least) total_count = query_result.get_matches_estimated() uids = [] for hit in query_result: uids.append(hit.document.get_value(_VALUE_UID)) return (uids, total_count) def delete(self, uid): self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid) self._flush(True) def get_activities(self): activities = [] prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY for term in self._database.allterms(prefix): activities.append(term.term[len(prefix):]) return activities def flush(self): self._flush(True) def get_index_updated(self): return os.path.exists(self._index_updated_path) index_updated = property(get_index_updated) def _set_index_updated(self, index_updated): if self._std_index_path != self._index_path: # operating from tmpfs return True if index_updated != self.index_updated: if index_updated: index_updated_file = open(self._index_updated_path, 'w') # index_updated = True will happen every # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync os.fsync(index_updated_file.fileno()) index_updated_file.close() else: os.remove(self._index_updated_path) def _flush_timeout_cb(self): self._flush(True) return False def _flush(self, force=False): """Called after any database mutation""" logging.debug('IndexStore.flush: force=%r _pending_writes=%r', force, self._pending_writes) self._set_index_updated(False) if self._flush_timeout is not None: GLib.source_remove(self._flush_timeout) self._flush_timeout = None self._pending_writes += 1 if force or self._pending_writes > _FLUSH_THRESHOLD: try: logging.debug("Start database flush") self._database.flush() logging.debug("Completed database flush") except Exception, e: logging.exception(e) logging.error("Exception during database.flush()") # bail out to trigger a reindex sys.exit(1) self._pending_writes = 0 self._set_index_updated(True) else:
class IndexStore(object): """Index metadata and provide rich query facilities on it. """ def __init__(self): self._database = None self._flush_timeout = None self._pending_writes = 0 root_path=layoutmanager.get_instance().get_root_path() self._index_updated_path = os.path.join(root_path, 'index_updated') self._std_index_path = layoutmanager.get_instance().get_index_path() self._index_path = self._std_index_path def open_index(self, temp_path=False): # callers to open_index must be able to # handle an exception -- usually caused by # IO errors such as ENOSPC and retry putting # the index on a temp_path if temp_path: try: # mark the on-disk index stale self._set_index_updated(False) except: pass self._index_path = temp_path else: self._index_path = self._std_index_path try: self._database = WritableDatabase(self._index_path, xapian.DB_CREATE_OR_OPEN) except Exception as e: logging.error('Exception opening database') raise def close_index(self): """Close index database if it is open.""" if not self._database: return self._flush(True) try: # does Xapian write in its destructors? self._database = None except Exception as e: logging.error('Exception tearing down database') raise def remove_index(self): if not os.path.exists(self._index_path): return for f in os.listdir(self._index_path): os.remove(os.path.join(self._index_path, f)) def contains(self, uid): postings = self._database.postlist(_PREFIX_FULL_VALUE + \ _PREFIX_UID + uid) try: __ = postings.next() except StopIteration: return False return True def store(self, uid, properties): document = Document() document.add_value(_VALUE_UID, uid) term_generator = TermGenerator() term_generator.index_document(document, properties) if not self.contains(uid): self._database.add_document(document) else: self._database.replace_document(_PREFIX_FULL_VALUE + \ _PREFIX_UID + uid, document) self._flush(True) def find(self, query): offset = query.pop('offset', 0) limit = query.pop('limit', MAX_QUERY_LIMIT) order_by = query.pop('order_by', []) query_string = query.pop('query', None) query_parser = QueryParser() query_parser.set_database(self._database) enquire = Enquire(self._database) enquire.set_query(query_parser.parse_query(query, query_string)) # This will assure that the results count is exact. check_at_least = offset + limit + 1 if not order_by: order_by = '+timestamp' else: order_by = order_by[0] if order_by == '+timestamp': enquire.set_sort_by_value(_VALUE_TIMESTAMP, True) elif order_by == '-timestamp': enquire.set_sort_by_value(_VALUE_TIMESTAMP, False) elif order_by == '+title': enquire.set_sort_by_value(_VALUE_TITLE, True) elif order_by == '-title': enquire.set_sort_by_value(_VALUE_TITLE, False) elif order_by == '+filesize': enquire.set_sort_by_value(_VALUE_FILESIZE, True) elif order_by == '-filesize': enquire.set_sort_by_value(_VALUE_FILESIZE, False) elif order_by == '+creation_time': enquire.set_sort_by_value(_VALUE_CREATION_TIME, True) elif order_by == '-creation_time': enquire.set_sort_by_value(_VALUE_CREATION_TIME, False) else: logging.warning('Unsupported property for sorting: %s', order_by) query_result = enquire.get_mset(offset, limit, check_at_least) total_count = query_result.get_matches_estimated() uids = [] for hit in query_result: uids.append(hit.document.get_value(_VALUE_UID)) return (uids, total_count) def delete(self, uid): self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid) self._flush(True) def get_activities(self): activities = [] prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY for term in self._database.allterms(prefix): activities.append(term.term[len(prefix):]) return activities def flush(self): self._flush(True) def get_index_updated(self): return os.path.exists(self._index_updated_path) index_updated = property(get_index_updated) def _set_index_updated(self, index_updated): if self._std_index_path != self._index_path: # operating from tmpfs return True if index_updated != self.index_updated: if index_updated: index_updated_file = open(self._index_updated_path, 'w') # index_updated = True will happen every # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync os.fsync(index_updated_file.fileno()) index_updated_file.close() else: os.remove(self._index_updated_path) def _flush_timeout_cb(self): self._flush(True) return False def _flush(self, force=False): """Called after any database mutation""" logging.debug('IndexStore.flush: force=%r _pending_writes=%r', force, self._pending_writes) self._set_index_updated(False) if self._flush_timeout is not None: GObject.source_remove(self._flush_timeout) self._flush_timeout = None self._pending_writes += 1 if force or self._pending_writes > _FLUSH_THRESHOLD: try: logging.debug("Start database flush") self._database.flush() logging.debug("Completed database flush") except Exception, e: logging.exception(e) logging.error("Exception during database.flush()") # bail out to trigger a reindex sys.exit(1) self._pending_writes = 0 self._set_index_updated(True) else: