def testCorrectAdapter(self):
        from zope.component import provideAdapter

        from Products.CMFCore.PortalContent import PortalContent
        from plone.indexer.interfaces import IIndexer as PIIIndexer
        from plone.indexer.wrapper import IndexableObjectWrapper \
            as PIIndexableObjectWrapper

        from zopyx.txng3.core.interfaces import IIndexableContent

        d = PortalContent()
        piwrapper = PIIndexableObjectWrapper(d, None)
        txngwrapper = IIndexableContent(piwrapper)
        def SearchableText():
            return ""
        provideAdapter(lambda a,b:SearchableText, (None, None), \
            PIIIndexer, name='SearchableText')
        # This can throw an exception if the adapter is not prepared for
        # the "transparent" Plone Indexer wrapper
        txngwrapper.indexableContent('SearchableText')
Exemple #2
0
    def testCorrectAdapter(self):
        from zope.component import provideAdapter

        from Products.CMFCore.PortalContent import PortalContent
        from plone.indexer.interfaces import IIndexer as PIIIndexer
        from plone.indexer.wrapper import IndexableObjectWrapper \
            as PIIndexableObjectWrapper

        from zopyx.txng3.core.interfaces import IIndexableContent

        d = PortalContent()
        piwrapper = PIIndexableObjectWrapper(d, None)
        txngwrapper = IIndexableContent(piwrapper)

        def SearchableText():
            return ""
        provideAdapter(lambda a,b:SearchableText, (None, None), \
            PIIIndexer, name='SearchableText')
        # This can throw an exception if the adapter is not prepared for
        # the "transparent" Plone Indexer wrapper
        txngwrapper.indexableContent('SearchableText')
Exemple #3
0
    def SearchableText(self):
        """Return textual content of the file for the search index.
        
        This is usually only used If TextIndexNG3 is not installed.
        But if collective.solr is used then this is used even with
        TextIndexNG3.
        """
        result = BaseProxy.SearchableText(self)

        indexable = IIndexableContent(self, None)
        if indexable is not None:
            # We might get here if TextIndexNG3 is installed but
            # the content is being indexed by collective.solr.
            # In this case, use TextIndexNG3 to obtain the text for
            # binary files.
            icc = indexable.indexableContent(['SearchableText'])
            result = ' '.join(info['content'].encode('utf8')
                              for info in icc.getFieldData('SearchableText'))
        elif self.Format().startswith("text/"):
            data = self.get_data()
            encoding = chardet.detect(data)["encoding"] or 'ascii'
            result += ' ' + data.decode(encoding, 'ignore').encode('utf8')

        return result
Exemple #4
0
    def SearchableText(self):
        """Return textual content of the file for the search index.
        
        This is usually only used If TextIndexNG3 is not installed.
        But if collective.solr is used then this is used even with
        TextIndexNG3.
        """
        result = BaseProxy.SearchableText(self)

        indexable = IIndexableContent(self, None)
        if indexable is not None:
            # We might get here if TextIndexNG3 is installed but
            # the content is being indexed by collective.solr.
            # In this case, use TextIndexNG3 to obtain the text for
            # binary files.
            icc = indexable.indexableContent(['SearchableText'])
            result = ' '.join(info['content'].encode('utf8')
                              for info in icc.getFieldData('SearchableText'))
        elif self.Format().startswith("text/"):
            data = self.get_data()
            encoding = chardet.detect(data)["encoding"]
            result += ' ' + data.decode(encoding, 'ignore').encode('utf8')

        return result
def extract_content(fields, obj, default_encoding=DEFAULT_ENCODING, default_language=DEFAULT_LANGUAGE):   
    """ This helper methods tries to extract indexable content from a content 
        object in different ways. First we try to check for ITextIndexable
        interface or ITextIndexableRaw interfaces which are the preferred 
        way to interace with TextIndexNG indexes. Otherwise we fall back
        to the standard Zope 2 behaviour and try to get the content by
        looking at the corresponding attributes or methods directly.
        Please note that this method will not contain content-type
        specific extraction code. This should be handled in every case by
        the content-type implementation itself or through an adapter.
    """

    adapter = IIndexableContent(obj, None)
    if adapter:
        # the official TXNG3 indexer API

        icc = adapter.indexableContent(fields)

    elif hasattr(obj, 'txng_get'):

        # old Zope behaviour for objects providing the txng_get() hook
        warnings.warn('Using the txng_get() hook for class %s is deprecated.'
                      ' Use IndexContentCollector implementation instead' % obj.__class__.__name__, 
                       DeprecationWarning, 
                       stacklevel=2)
          
        result = obj.txng_get(fields)
        if result is None:
            return None

        # unpack result triple
        source, mimetype, encoding = result
        icc = IndexContentCollector()
        icc.addBinary(fields[0], source, mimetype, encoding, default_language)

    else:

        # old Zope 2 behaviour: look up value either as attribute of the object
        # or as method providing a return value as indexable content

        d = {}

        icc = IndexContentCollector()

        for f in fields:
            
            v = getattr(obj, f, None)
            if not v: continue
            if callable(v):
                v = v()

            # accept only a string/unicode string    
            if not isinstance(v, basestring):
                raise TypeError('Value returned for field "%s" must be string or unicode (got: %s, %s)' % (f, repr(v), type(v)))

            if isinstance(v, str):
                v = unicode(v, default_encoding, 'ignore')
        
            icc.addContent(f, v, default_language)

    return icc or None
Exemple #6
0
def extract_content(fields,
                    obj,
                    default_encoding=DEFAULT_ENCODING,
                    default_language=DEFAULT_LANGUAGE):
    """ This helper methods tries to extract indexable content from a content 
        object in different ways. First we try to check for ITextIndexable
        interface or ITextIndexableRaw interfaces which are the preferred 
        way to interace with TextIndexNG indexes. Otherwise we fall back
        to the standard Zope 2 behaviour and try to get the content by
        looking at the corresponding attributes or methods directly.
        Please note that this method will not contain content-type
        specific extraction code. This should be handled in every case by
        the content-type implementation itself or through an adapter.
    """

    adapter = IIndexableContent(obj, None)
    if adapter:
        # the official TXNG3 indexer API

        icc = adapter.indexableContent(fields)

    elif hasattr(obj, 'txng_get'):

        # old Zope behaviour for objects providing the txng_get() hook
        warnings.warn('Using the txng_get() hook for class %s is deprecated.'
                      ' Use IndexContentCollector implementation instead' %
                      obj.__class__.__name__,
                      DeprecationWarning,
                      stacklevel=2)

        result = obj.txng_get(fields)
        if result is None:
            return None

        # unpack result triple
        source, mimetype, encoding = result
        icc = IndexContentCollector()
        icc.addBinary(fields[0], source, mimetype, encoding, default_language)

    else:

        # old Zope 2 behaviour: look up value either as attribute of the object
        # or as method providing a return value as indexable content

        d = {}

        icc = IndexContentCollector()

        for f in fields:

            v = getattr(obj, f, None)
            if not v: continue
            if callable(v):
                v = v()

            # accept only a string/unicode string
            if not isinstance(v, basestring):
                raise TypeError(
                    'Value returned for field "%s" must be string or unicode (got: %s, %s)'
                    % (f, repr(v), type(v)))

            if isinstance(v, str):
                v = unicode(v, default_encoding, 'ignore')

            icc.addContent(f, v, default_language)

    return icc or None