def set_metadata(self, key, value): """Set an item of metadata stored in the connection. The value supplied will be returned by subsequent calls to get_metadata() which use the same key. Keys with a leading underscore are reserved for internal use - you should not use such keys unless you really know what you are doing. This will store the value supplied in the database. It will not be visible to readers (ie, search connections) until after the next flush. The key is limited to about 200 characters (the same length as a term is limited to). The value can be several megabytes in size. To remove an item of metadata, simply call this with a `value` parameter containing an empty string. """ if self._index is None: raise errors.IndexerError("IndexerConnection has been closed") if not hasattr(self._index, 'set_metadata'): raise errors.IndexerError( "Version of xapian in use does not support metadata") log(self._index.set_metadata, key, value)
def _act_index_freetext(fieldname, doc, value, context, weight=1, language=None, stop=None, spell=False, nopos=False, allow_field_specific=True, search_by_default=True): """Perform the INDEX_FREETEXT action. """ termgen = log(xapian.TermGenerator) if language is not None: termgen.set_stemmer(log(xapian.Stem, language)) if stop is not None: stopper = log(xapian.SimpleStopper) for term in stop: stopper.add(term) termgen.set_stopper(stopper) if spell: termgen.set_database(context.index) termgen.set_flags(termgen.FLAG_SPELLING) termgen.set_document(doc._doc) if search_by_default: termgen.set_termpos(context.current_position) # Store a copy of the field without a prefix, for non-field-specific # searches. if nopos: termgen.index_text_without_positions(value, weight, '') else: termgen.index_text(value, weight, '') if allow_field_specific: # Store a second copy of the term with a prefix, for field-specific # searches. prefix = doc._fieldmappings.get_prefix(fieldname) if len(prefix) != 0: termgen.set_termpos(context.current_position) if nopos: termgen.index_text_without_positions(value, weight, prefix) else: termgen.index_text(value, weight, prefix) # Add a gap between each field instance, so that phrase searches don't # match across instances. termgen.increase_termpos(10) context.current_position = termgen.get_termpos()
def _act_index_freetext(fieldname, doc, value, context, weight=1, language=None, stop=None, spell=False, nopos=False, allow_field_specific=True, search_by_default=True): """Perform the INDEX_FREETEXT action. """ termgen = log(xapian.TermGenerator) if language is not None: termgen.set_stemmer(log(xapian.Stem, language)) if stop is not None: stopper = log(xapian.SimpleStopper) for term in stop: stopper.add (term) termgen.set_stopper (stopper) if spell: termgen.set_database(context.index) termgen.set_flags(termgen.FLAG_SPELLING) termgen.set_document(doc._doc) if search_by_default: termgen.set_termpos(context.current_position) # Store a copy of the field without a prefix, for non-field-specific # searches. if nopos: termgen.index_text_without_positions(value, weight, '') else: termgen.index_text(value, weight, '') if allow_field_specific: # Store a second copy of the term with a prefix, for field-specific # searches. prefix = doc._fieldmappings.get_prefix(fieldname) if len(prefix) != 0: termgen.set_termpos(context.current_position) if nopos: termgen.index_text_without_positions(value, weight, prefix) else: termgen.index_text(value, weight, prefix) # Add a gap between each field instance, so that phrase searches don't # match across instances. termgen.increase_termpos(10) context.current_position = termgen.get_termpos()
def __init__(self, indexpath): """Create a new connection to the index. There may only be one indexer connection for a particular database open at a given time. Therefore, if a connection to the database is already open, this will raise a xapian.DatabaseLockError. If the database doesn't already exist, it will be created. """ self._index = log(xapian.WritableDatabase, indexpath, xapian.DB_CREATE_OR_OPEN) self._indexpath = indexpath # Read existing actions. self._field_actions = {} self._field_mappings = fieldmappings.FieldMappings() self._facet_hierarchy = {} self._facet_query_table = {} self._next_docid = 0 self._config_modified = False self._load_config() # Set management of the memory used. # This can be removed once Xapian implements this itself. self._mem_buffered = 0 self.set_max_mem_use()
def _store_config(self): """Store the configuration for the database. Currently, this stores the configuration in a file in the database directory, so changes to it are not protected by transactions. When support is available in xapian for storing metadata associated with databases. this will be used instead of a file. """ assert self._index is not None config_str = cPickle.dumps(( self._field_actions, self._field_mappings.serialise(), self._facet_hierarchy, self._facet_query_table, self._next_docid, ), 2) log(self._index.set_metadata, '_xappy_config', config_str) self._config_modified = False
def get_metadata(self, key): """Get an item of metadata stored in the connection. This returns a value stored by a previous call to set_metadata. If the value is not found, this will return the empty string. """ if self._index is None: raise errors.IndexerError("IndexerConnection has been closed") if not hasattr(self._index, 'get_metadata'): raise errors.IndexerError("Version of xapian in use does not support metadata") return log(self._index.get_metadata, key)
def get_metadata(self, key): """Get an item of metadata stored in the connection. This returns a value stored by a previous call to set_metadata. If the value is not found, this will return the empty string. """ if self._index is None: raise errors.IndexerError("IndexerConnection has been closed") if not hasattr(self._index, 'get_metadata'): raise errors.IndexerError( "Version of xapian in use does not support metadata") return log(self._index.get_metadata, key)
def _act_facet(fieldname, doc, value, context, type=None): """Perform the FACET action. """ if type is None or type == 'string': value = value.lower() doc.add_term(fieldname, value, 0) serialiser = log(xapian.StringListSerialiser, doc.get_value(fieldname, 'facet')) serialiser.append(value) doc.add_value(fieldname, serialiser.get(), 'facet') else: marshaller = SortableMarshaller() fn = marshaller.get_marshall_function(fieldname, type) doc.add_value(fieldname, fn(fieldname, value), 'facet')
def set_metadata(self, key, value): """Set an item of metadata stored in the connection. The value supplied will be returned by subsequent calls to get_metadata() which use the same key. Keys with a leading underscore are reserved for internal use - you should not use such keys unless you really know what you are doing. This will store the value supplied in the database. It will not be visible to readers (ie, search connections) until after the next flush. The key is limited to about 200 characters (the same length as a term is limited to). The value can be several megabytes in size. To remove an item of metadata, simply call this with a `value` parameter containing an empty string. """ if self._index is None: raise errors.IndexerError("IndexerConnection has been closed") if not hasattr(self._index, 'set_metadata'): raise errors.IndexerError("Version of xapian in use does not support metadata") log(self._index.set_metadata, key, value)
def __init__(self, fieldmappings, xapdoc=None): """Create a ProcessedDocument. `fieldmappings` is the configuration from a database connection used lookup the configuration to use to store each field. If supplied, `xapdoc` is a Xapian document to store in the processed document. Otherwise, a new Xapian document is created. """ if xapdoc is None: self._doc = log(xapian.Document) else: self._doc = xapdoc self._fieldmappings = fieldmappings self._data = None
def _load_config(self): """Load the configuration for the database. """ assert self._index is not None config_str = log(self._index.get_metadata, '_xappy_config') if len(config_str) == 0: return try: (self._field_actions, mappings, self._facet_hierarchy, self._facet_query_table, self._next_docid) = cPickle.loads(config_str) except ValueError: # Backwards compatibility - configuration used to lack _facet_hierarchy and _facet_query_table (self._field_actions, mappings, self._next_docid) = cPickle.loads(config_str) self._facet_hierarchy = {} self._facet_query_table = {} self._field_mappings = fieldmappings.FieldMappings(mappings) self._config_modified = False