def get_xdoc_from_document(self, doc_values): """Return (abspath, term, xdoc) from the document (resource or values as dict) """ term = None metadata = self._metadata # Check the input if type(doc_values) is not dict: raise NotImplementedError('Deprecated: doc_values should be a dict') fields = self._fields abspath = doc_values['abspath'] # Make the xapian document metadata_modified = False xdoc = Document() for name, value in doc_values.iteritems(): if name not in fields: warn_not_indexed_nor_stored(name) field_cls = fields[name] # New field ? if name not in metadata: info = metadata[name] = self._get_info(field_cls, name) metadata_modified = True else: info = metadata[name] # XXX This comment is no longer valid, now the key field is # always abspath with field_cls = String # Store the key field with the prefix 'Q' # Comment: the key field is indexed twice, but we must do it # one => to index (as the others) # two => to index without split # the problem is that "_encode != _index" if name == 'abspath': key_value = _reduce_size(_encode(field_cls, value)) term = 'Q' + key_value xdoc.add_term(term) # A multilingual value? if isinstance(value, dict): for language, lang_value in value.iteritems(): lang_name = name + '_' + language # New field ? if lang_name not in metadata: lang_info = self._get_info(field_cls, lang_name) lang_info['from'] = name metadata[lang_name] = lang_info metadata_modified = True else: lang_info = metadata[lang_name] # The value can be None if lang_value is not None: # Is stored ? if 'value' in lang_info: xdoc.add_value(lang_info['value'], _encode(field_cls, lang_value)) # Is indexed ? if 'prefix' in lang_info: # Comment: Index twice _index(xdoc, field_cls, lang_value, info['prefix'], language) _index(xdoc, field_cls, lang_value, lang_info['prefix'], language) # The value can be None elif value is not None: # Is stored ? if 'value' in info: xdoc.add_value(info['value'], _encode(field_cls, value)) # Is indexed ? if 'prefix' in info: # By default language='en' _index(xdoc, field_cls, value, info['prefix'], 'en') # Store metadata ? if metadata_modified: metadata = self._metadata self._db.set_metadata('metadata', dumps(metadata)) # Ok return abspath, term, xdoc
def index_document(self, document): """Add a new document. """ db = self._db metadata = self._metadata fields = self._fields # Check the input if type(document) is dict: doc_values = document else: doc_values = document.get_catalog_values() # Make the xapian document metadata_modified = False xdoc = Document() for name, value in doc_values.iteritems(): if name not in fields: warn_not_indexed_nor_stored(name) field_cls = fields[name] # New field ? if name not in metadata: info = metadata[name] = self._get_info(field_cls, name) metadata_modified = True else: info = metadata[name] # XXX This comment is no longer valid, now the key field is # always abspath with field_cls = String # Store the key field with the prefix 'Q' # Comment: the key field is indexed twice, but we must do it # one => to index (as the others) # two => to index without split # the problem is that "_encode != _index" if name == 'abspath': key_value = _reduce_size(_encode(field_cls, value)) xdoc.add_term('Q' + key_value) # A multilingual value? if isinstance(value, dict): for language, lang_value in value.iteritems(): lang_name = name + '_' + language # New field ? if lang_name not in metadata: lang_info = self._get_info(field_cls, lang_name) lang_info['from'] = name metadata[lang_name] = lang_info metadata_modified = True else: lang_info = metadata[lang_name] # The value can be None if lang_value is not None: # Is stored ? if 'value' in lang_info: xdoc.add_value(lang_info['value'], _encode(field_cls, lang_value)) # Is indexed ? if 'prefix' in lang_info: # Comment: Index twice _index(xdoc, field_cls, lang_value, info['prefix'], language) _index(xdoc, field_cls, lang_value, lang_info['prefix'], language) # The value can be None elif value is not None: # Is stored ? if 'value' in info: xdoc.add_value(info['value'], _encode(field_cls, value)) # Is indexed ? if 'prefix' in info: # By default language='en' _index(xdoc, field_cls, value, info['prefix'], 'en') # TODO: Don't store two documents with the same key field! # Save the doc db.add_document(xdoc) # Store metadata ? if metadata_modified: db.set_metadata('metadata', dumps(metadata))
def index_document(self, document): """Add a new document. """ db = self._db metadata = self._metadata fields = self._fields # Check the input if type(document) is dict: doc_values = document elif isinstance(document, CatalogAware): doc_values = document.get_catalog_values() else: raise ValueError, 'the document must be a CatalogAware object' # Make the xapian document metadata_modified = False xdoc = Document() for name, value in doc_values.iteritems(): field_cls = fields[name] # New field ? if name not in metadata: info = metadata[name] = self._get_info(field_cls, name) metadata_modified = True else: info = metadata[name] # A multilingual value ? if isinstance(value, dict): for language, lang_value in value.iteritems(): lang_name = name + '_' + language # New field ? if lang_name not in metadata: lang_info = self._get_info(field_cls, lang_name) lang_info['from'] = name metadata[lang_name] = lang_info metadata_modified = True else: lang_info = metadata[lang_name] # The value can be None if lang_value is not None: # Is stored ? if 'value' in lang_info: xdoc.add_value(lang_info['value'], _encode(field_cls, lang_value)) # Is indexed ? if 'prefix' in lang_info: # Comment: Index twice _index(xdoc, field_cls, lang_value, info['prefix'], language) _index(xdoc, field_cls, lang_value, lang_info['prefix'], language) # The value can be None elif value is not None: # Is stored ? if 'value' in info: xdoc.add_value(info['value'], _encode(field_cls, value)) # Is indexed ? if 'prefix' in info: # By default language='en' _index(xdoc, field_cls, value, info['prefix'], 'en') # Store the key field with the prefix 'Q' # Comment: the key field is indexed twice, but we must do it # one => to index (as the others) # two => to index without split # the problem is that "_encode != _index" key_field = self._key_field if (key_field is None or key_field not in doc_values or doc_values[key_field] is None): raise ValueError, 'the "key_field" value is compulsory' data = _reduce_size(_encode(fields[key_field], doc_values[key_field])) xdoc.add_term('Q' + data) # TODO: Don't store two documents with the same key field! # Save the doc db.add_document(xdoc) # Store metadata ? if metadata_modified: db.set_metadata('metadata', dumps(metadata))