Example #1
0
    def register(self, id, instance=None):
        """ map an id to an instance """
        if self.data.has_key(id):
            pass
            #raise RegistryException, '"%s" already registered' % id
        
        for iface in self.ifaces:
            try:
                if isinstance(instance, InstanceType):
                    verifyClass(iface, instance.__class__)
                else:
                    verifyClass(iface, instance)

            except:
                LOG('TextIndexNG', WARNING, 
                    'interface broken for %s' % str(instance),
                    error=sys.exc_info())
                raise
    
        self.data[id] = instance
Example #2
0
    def __init__(self, id, extra=None, caller=None):

        def _get(o, k, default):
            """ return a value for a given key of a dict/record 'o' """
            if isinstance(o, dict):
                return o.get(k, default)
            else:
                return getattr(o, k, default)
        
        self.id = id

        # check parameters
        if extra:
            for k in extra.keys():
                if not k in self._all_options:
                    raise TXNGError,'unknown parameter "%s"' % k

        if caller is not None:
            self.catalog_path = '/'.join(caller.getPhysicalPath())
        else:
            self.catalog_path = None

        # indexed attributes
        self._indexed_fields = _get(extra, 'indexed_fields', '').split(',')
        self._indexed_fields = [ attr.strip() for attr in  self._indexed_fields if attr ]
        if not self._indexed_fields:
            self._indexed_fields = [ self.id ]

        # splitter to be used
        self.use_splitter = _get(extra, 'use_splitter', 'TXNGSplitter')

        # max len of splitted words
        self.splitter_max_len= _get(extra, 'splitter_max_len', 64)

        # allow single characters
        self.splitter_single_chars = _get(extra,'splitter_single_chars',0)

        # valid word separators
        self.splitter_separators = _get(extra, 'splitter_separators','.+-_@')

        # allow single characters
        self.splitter_casefolding = _get(extra,'splitter_casefolding',1) 

        # left truncation
        self.truncate_left = _get(extra, 'truncate_left', 0)

        # Term autoexpansion
        self.autoexpand = _get(extra, 'autoexpand', 0)
        self.autoexpand_limit = _get(extra, 'autoexpand_limit', 4)

        # maximum number of hits
        self.numhits = _get(extra, 'numhits', 999999999)

        # default maximum distance for words with near search
        self.near_distance = _get(extra,'near_distance', 5)

        # Stopwords: either filename or StopWord object
        self.use_stopwords = _get(extra, 'use_stopwords', None) or None
        if self.use_stopwords:
            verifyClass(StopwordsInterface, self.use_stopwords.__class__)
     
        # Normalizer
        self.use_normalizer = _get(extra,'use_normalizer', None) or None

        # use converters from the ConvertersRegistry
        self.use_converters = _get(extra,'use_converters',0) 

        # Storage to be used
        self.use_storage = _get(extra,'use_storage', 'StandardStorage') 

        # encoding
        self.default_encoding = _get(extra,'default_encoding', 'iso-8859-15') 

        # check Parser
        self.use_parser = _get(extra, 'use_parser','PyQueryParser')
        
        # Thesaurus
        self.use_thesaurus = _get(extra, 'use_thesaurus', None)
        self.thesaurus_mode = _get(extra, 'thesaurus_mode', None)

        self.use_lexicon = 'StandardLexicon'
        self.clear()