def testCorrectAdapter(self): from zope.component import provideAdapter from Products.CMFCore.PortalContent import PortalContent from plone.indexer.interfaces import IIndexer as PIIIndexer from plone.indexer.wrapper import IndexableObjectWrapper \ as PIIndexableObjectWrapper from zopyx.txng3.core.interfaces import IIndexableContent d = PortalContent() piwrapper = PIIndexableObjectWrapper(d, None) txngwrapper = IIndexableContent(piwrapper) def SearchableText(): return "" provideAdapter(lambda a,b:SearchableText, (None, None), \ PIIIndexer, name='SearchableText') # This can throw an exception if the adapter is not prepared for # the "transparent" Plone Indexer wrapper txngwrapper.indexableContent('SearchableText')
def SearchableText(self): """Return textual content of the file for the search index. This is usually only used If TextIndexNG3 is not installed. But if collective.solr is used then this is used even with TextIndexNG3. """ result = BaseProxy.SearchableText(self) indexable = IIndexableContent(self, None) if indexable is not None: # We might get here if TextIndexNG3 is installed but # the content is being indexed by collective.solr. # In this case, use TextIndexNG3 to obtain the text for # binary files. icc = indexable.indexableContent(['SearchableText']) result = ' '.join(info['content'].encode('utf8') for info in icc.getFieldData('SearchableText')) elif self.Format().startswith("text/"): data = self.get_data() encoding = chardet.detect(data)["encoding"] or 'ascii' result += ' ' + data.decode(encoding, 'ignore').encode('utf8') return result
def SearchableText(self): """Return textual content of the file for the search index. This is usually only used If TextIndexNG3 is not installed. But if collective.solr is used then this is used even with TextIndexNG3. """ result = BaseProxy.SearchableText(self) indexable = IIndexableContent(self, None) if indexable is not None: # We might get here if TextIndexNG3 is installed but # the content is being indexed by collective.solr. # In this case, use TextIndexNG3 to obtain the text for # binary files. icc = indexable.indexableContent(['SearchableText']) result = ' '.join(info['content'].encode('utf8') for info in icc.getFieldData('SearchableText')) elif self.Format().startswith("text/"): data = self.get_data() encoding = chardet.detect(data)["encoding"] result += ' ' + data.decode(encoding, 'ignore').encode('utf8') return result
def extract_content(fields, obj, default_encoding=DEFAULT_ENCODING, default_language=DEFAULT_LANGUAGE): """ This helper methods tries to extract indexable content from a content object in different ways. First we try to check for ITextIndexable interface or ITextIndexableRaw interfaces which are the preferred way to interace with TextIndexNG indexes. Otherwise we fall back to the standard Zope 2 behaviour and try to get the content by looking at the corresponding attributes or methods directly. Please note that this method will not contain content-type specific extraction code. This should be handled in every case by the content-type implementation itself or through an adapter. """ adapter = IIndexableContent(obj, None) if adapter: # the official TXNG3 indexer API icc = adapter.indexableContent(fields) elif hasattr(obj, 'txng_get'): # old Zope behaviour for objects providing the txng_get() hook warnings.warn('Using the txng_get() hook for class %s is deprecated.' ' Use IndexContentCollector implementation instead' % obj.__class__.__name__, DeprecationWarning, stacklevel=2) result = obj.txng_get(fields) if result is None: return None # unpack result triple source, mimetype, encoding = result icc = IndexContentCollector() icc.addBinary(fields[0], source, mimetype, encoding, default_language) else: # old Zope 2 behaviour: look up value either as attribute of the object # or as method providing a return value as indexable content d = {} icc = IndexContentCollector() for f in fields: v = getattr(obj, f, None) if not v: continue if callable(v): v = v() # accept only a string/unicode string if not isinstance(v, basestring): raise TypeError('Value returned for field "%s" must be string or unicode (got: %s, %s)' % (f, repr(v), type(v))) if isinstance(v, str): v = unicode(v, default_encoding, 'ignore') icc.addContent(f, v, default_language) return icc or None
def extract_content(fields, obj, default_encoding=DEFAULT_ENCODING, default_language=DEFAULT_LANGUAGE): """ This helper methods tries to extract indexable content from a content object in different ways. First we try to check for ITextIndexable interface or ITextIndexableRaw interfaces which are the preferred way to interace with TextIndexNG indexes. Otherwise we fall back to the standard Zope 2 behaviour and try to get the content by looking at the corresponding attributes or methods directly. Please note that this method will not contain content-type specific extraction code. This should be handled in every case by the content-type implementation itself or through an adapter. """ adapter = IIndexableContent(obj, None) if adapter: # the official TXNG3 indexer API icc = adapter.indexableContent(fields) elif hasattr(obj, 'txng_get'): # old Zope behaviour for objects providing the txng_get() hook warnings.warn('Using the txng_get() hook for class %s is deprecated.' ' Use IndexContentCollector implementation instead' % obj.__class__.__name__, DeprecationWarning, stacklevel=2) result = obj.txng_get(fields) if result is None: return None # unpack result triple source, mimetype, encoding = result icc = IndexContentCollector() icc.addBinary(fields[0], source, mimetype, encoding, default_language) else: # old Zope 2 behaviour: look up value either as attribute of the object # or as method providing a return value as indexable content d = {} icc = IndexContentCollector() for f in fields: v = getattr(obj, f, None) if not v: continue if callable(v): v = v() # accept only a string/unicode string if not isinstance(v, basestring): raise TypeError( 'Value returned for field "%s" must be string or unicode (got: %s, %s)' % (f, repr(v), type(v))) if isinstance(v, str): v = unicode(v, default_encoding, 'ignore') icc.addContent(f, v, default_language) return icc or None