def __init__(self, unit=UNIT_WORD, **property_names):
        """
        Creates a SemcorTokenizer.

        @param unit: one of 'word', 'sentence' or 'paragraph';
            indicating the level of hierarchy to be processed.
        @type unit: C{String}
        """
        assert unit in [ SemcorTokenizer.UNIT_WORD,
                         SemcorTokenizer.UNIT_SENTENCE,
                         SemcorTokenizer.UNIT_PARAGRAPH ]
        self._unit = unit
        self._parse_method = _parseSGMLString
        # if it were valid XML, we could use this:
        #self._parse_method = xml.dom.minidom.parseString
        AbstractTokenizer.__init__(self, **property_names)
 def __init__(self, buffer_size=1024, **property_names):
     xml.sax.ContentHandler.__init__(self)
     self._lemma = ''
     self._buffer_size = buffer_size
     self.reset()
     AbstractTokenizer.__init__(self, **property_names)
 def __init__(self, **property_names):
     AbstractTokenizer.__init__(self, **property_names)