Python WritableDatabase.postlist Examples

Programming Language: Python

Namespace/Package Name: xapian

Class/Type: WritableDatabase

Method/Function: postlist

Examples at hotexamples.com: 2

Python WritableDatabase.postlist - 2 examples found. These are the top rated real world Python examples of xapian.WritableDatabase.postlist extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

WritableDatabase(4)

allterms(3)

delete_document(3)

flush(2)

get_metadata(2)

replace_document(2)

add_document(1)

begin_transaction(1)

cancel_transaction(1)

close(1)

commit_transaction(1)

postlist(1)

set_metadata(1)

Example #1

Show file

class IndexStore(object):
    """Index metadata and provide rich query facilities on it.
    """
    def __init__(self):
        self._database = None
        self._flush_timeout = None
        self._pending_writes = 0
        root_path = layoutmanager.get_instance().get_root_path()
        self._index_updated_path = os.path.join(root_path, 'index_updated')
        self._std_index_path = layoutmanager.get_instance().get_index_path()
        self._index_path = self._std_index_path

    def open_index(self, temp_path=False):
        # callers to open_index must be able to
        # handle an exception -- usually caused by
        # IO errors such as ENOSPC and retry putting
        # the index on a temp_path
        if temp_path:
            try:
                # mark the on-disk index stale
                self._set_index_updated(False)
            except:
                pass
            self._index_path = temp_path
        else:
            self._index_path = self._std_index_path
        try:
            self._database = WritableDatabase(self._index_path,
                                              xapian.DB_CREATE_OR_OPEN)
        except Exception as e:
            logging.error('Exception opening database')
            raise

    def close_index(self):
        """Close index database if it is open."""
        if not self._database:
            return

        self._flush(True)
        try:
            # does Xapian write in its destructors?
            self._database = None
        except Exception as e:
            logging.error('Exception tearing down database')
            raise

    def remove_index(self):
        if not os.path.exists(self._index_path):
            return
        for f in os.listdir(self._index_path):
            os.remove(os.path.join(self._index_path, f))

    def contains(self, uid):
        postings = self._database.postlist(_PREFIX_FULL_VALUE + \
            _PREFIX_UID + uid)
        try:
            __ = postings.next()
        except StopIteration:
            return False
        return True

    def store(self, uid, properties):
        document = Document()
        document.add_value(_VALUE_UID, uid)
        term_generator = TermGenerator()
        term_generator.index_document(document, properties)

        if not self.contains(uid):
            self._database.add_document(document)
        else:
            self._database.replace_document(_PREFIX_FULL_VALUE + \
                _PREFIX_UID + uid, document)

        self._flush(True)

    def find(self, query):
        offset = query.pop('offset', 0)
        limit = query.pop('limit', MAX_QUERY_LIMIT)
        order_by = query.pop('order_by', [])
        query_string = query.pop('query', None)

        query_parser = QueryParser()
        query_parser.set_database(self._database)
        enquire = Enquire(self._database)
        enquire.set_query(query_parser.parse_query(query, query_string))

        # This will assure that the results count is exact.
        check_at_least = offset + limit + 1

        if not order_by:
            order_by = '+timestamp'
        else:
            order_by = order_by[0]

        if order_by == '+timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
        elif order_by == '-timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
        elif order_by == '+title':
            enquire.set_sort_by_value(_VALUE_TITLE, True)
        elif order_by == '-title':
            enquire.set_sort_by_value(_VALUE_TITLE, False)
        elif order_by == '+filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, True)
        elif order_by == '-filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, False)
        elif order_by == '+creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, True)
        elif order_by == '-creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, False)
        else:
            logging.warning('Unsupported property for sorting: %s', order_by)

        query_result = enquire.get_mset(offset, limit, check_at_least)
        total_count = query_result.get_matches_estimated()

        uids = []
        for hit in query_result:
            uids.append(hit.document.get_value(_VALUE_UID))

        return (uids, total_count)

    def delete(self, uid):
        self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid)
        self._flush(True)

    def get_activities(self):
        activities = []
        prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY
        for term in self._database.allterms(prefix):
            activities.append(term.term[len(prefix):])
        return activities

    def flush(self):
        self._flush(True)

    def get_index_updated(self):
        return os.path.exists(self._index_updated_path)

    index_updated = property(get_index_updated)

    def _set_index_updated(self, index_updated):
        if self._std_index_path != self._index_path:
            # operating from tmpfs
            return True
        if index_updated != self.index_updated:
            if index_updated:
                index_updated_file = open(self._index_updated_path, 'w')
                # index_updated = True will happen every
                # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync
                os.fsync(index_updated_file.fileno())
                index_updated_file.close()
            else:
                os.remove(self._index_updated_path)

    def _flush_timeout_cb(self):
        self._flush(True)
        return False

    def _flush(self, force=False):
        """Called after any database mutation"""
        logging.debug('IndexStore.flush: force=%r _pending_writes=%r', force,
                      self._pending_writes)

        self._set_index_updated(False)

        if self._flush_timeout is not None:
            GLib.source_remove(self._flush_timeout)
            self._flush_timeout = None

        self._pending_writes += 1
        if force or self._pending_writes > _FLUSH_THRESHOLD:
            try:
                logging.debug("Start database flush")
                self._database.flush()
                logging.debug("Completed database flush")
            except Exception, e:
                logging.exception(e)
                logging.error("Exception during database.flush()")
                # bail out to trigger a reindex
                sys.exit(1)
            self._pending_writes = 0
            self._set_index_updated(True)
        else:

Example #2

Show file

class IndexStore(object):
    """Index metadata and provide rich query facilities on it.
    """

    def __init__(self):
        self._database = None
        self._flush_timeout = None
        self._pending_writes = 0
        root_path=layoutmanager.get_instance().get_root_path()
        self._index_updated_path = os.path.join(root_path,
                                                'index_updated')
        self._std_index_path = layoutmanager.get_instance().get_index_path()
        self._index_path = self._std_index_path

    def open_index(self, temp_path=False):
        # callers to open_index must be able to
        # handle an exception -- usually caused by
        # IO errors such as ENOSPC and retry putting
        # the index on a temp_path
        if temp_path:
            try:
                # mark the on-disk index stale
                self._set_index_updated(False)
            except:
                pass
            self._index_path = temp_path
        else:
             self._index_path = self._std_index_path
        try:
             self._database = WritableDatabase(self._index_path,
                                               xapian.DB_CREATE_OR_OPEN)
        except Exception as e:
             logging.error('Exception opening database')
             raise

    def close_index(self):
        """Close index database if it is open."""
        if not self._database:
            return

        self._flush(True)
        try:
            # does Xapian write in its destructors?
            self._database = None
        except Exception as e:
            logging.error('Exception tearing down database')
            raise

    def remove_index(self):
        if not os.path.exists(self._index_path):
            return
        for f in os.listdir(self._index_path):
            os.remove(os.path.join(self._index_path, f))

    def contains(self, uid):
        postings = self._database.postlist(_PREFIX_FULL_VALUE + \
            _PREFIX_UID + uid)
        try:
            __ = postings.next()
        except StopIteration:
            return False
        return True

    def store(self, uid, properties):
        document = Document()
        document.add_value(_VALUE_UID, uid)
        term_generator = TermGenerator()
        term_generator.index_document(document, properties)

        if not self.contains(uid):
            self._database.add_document(document)
        else:
            self._database.replace_document(_PREFIX_FULL_VALUE + \
                _PREFIX_UID + uid, document)

        self._flush(True)

    def find(self, query):
        offset = query.pop('offset', 0)
        limit = query.pop('limit', MAX_QUERY_LIMIT)
        order_by = query.pop('order_by', [])
        query_string = query.pop('query', None)

        query_parser = QueryParser()
        query_parser.set_database(self._database)
        enquire = Enquire(self._database)
        enquire.set_query(query_parser.parse_query(query, query_string))

        # This will assure that the results count is exact.
        check_at_least = offset + limit + 1

        if not order_by:
            order_by = '+timestamp'
        else:
            order_by = order_by[0]

        if order_by == '+timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
        elif order_by == '-timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
        elif order_by == '+title':
            enquire.set_sort_by_value(_VALUE_TITLE, True)
        elif order_by == '-title':
            enquire.set_sort_by_value(_VALUE_TITLE, False)
        elif order_by == '+filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, True)
        elif order_by == '-filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, False)
        elif order_by == '+creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, True)
        elif order_by == '-creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, False)
        else:
            logging.warning('Unsupported property for sorting: %s', order_by)

        query_result = enquire.get_mset(offset, limit, check_at_least)
        total_count = query_result.get_matches_estimated()

        uids = []
        for hit in query_result:
            uids.append(hit.document.get_value(_VALUE_UID))

        return (uids, total_count)

    def delete(self, uid):
        self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid)
        self._flush(True)

    def get_activities(self):
        activities = []
        prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY
        for term in self._database.allterms(prefix):
            activities.append(term.term[len(prefix):])
        return activities

    def flush(self):
        self._flush(True)

    def get_index_updated(self):
        return os.path.exists(self._index_updated_path)

    index_updated = property(get_index_updated)

    def _set_index_updated(self, index_updated):
        if self._std_index_path != self._index_path:
             # operating from tmpfs
             return True
        if index_updated != self.index_updated:
            if index_updated:
                index_updated_file = open(self._index_updated_path, 'w')
                # index_updated = True will happen every
                # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync
                os.fsync(index_updated_file.fileno())
                index_updated_file.close()
            else:
                os.remove(self._index_updated_path)

    def _flush_timeout_cb(self):
        self._flush(True)
        return False

    def _flush(self, force=False):
        """Called after any database mutation"""
        logging.debug('IndexStore.flush: force=%r _pending_writes=%r',
                force, self._pending_writes)

        self._set_index_updated(False)

        if self._flush_timeout is not None:
            GObject.source_remove(self._flush_timeout)
            self._flush_timeout = None

        self._pending_writes += 1
        if force or self._pending_writes > _FLUSH_THRESHOLD:
            try:
                logging.debug("Start database flush")
                self._database.flush()
                logging.debug("Completed database flush")
            except Exception, e:
                logging.exception(e)
                logging.error("Exception during database.flush()")
                # bail out to trigger a reindex
                sys.exit(1)
            self._pending_writes = 0
            self._set_index_updated(True)
        else: