Ejemplo n.º 1
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     tknr = self.get_setting(session, 'tokenizer')
     if tknr[-9:] == 'Tokenizer' and hasattr(lucene, tknr):            
         self.tokenizer = getattr(lucene, tknr)
     else:
         raise ConfigFileException("Unknown Lucene Tokenizer")
Ejemplo n.º 2
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     tknr = self.get_setting(session, "tokenizer")
     if tknr[-9:] == "Tokenizer" and hasattr(lucene, tknr):
         self.tokenizer = getattr(lucene, tknr)
     else:
         raise ConfigFileException("Unknown Lucene Tokenizer")
Ejemplo n.º 3
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     self.structure = self.get_setting(session, 'structuredOutput', 0)
     self.stem = self.get_setting(session, 'useStem', 0)
     self.pos = self.get_setting(session, 'pos', 0)
     self.justPos = self.get_setting(session, 'justPos', 0)
Ejemplo n.º 4
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     self.punkt = nltk.data.load('tokenizers/punkt/english.pickle')
Ejemplo n.º 5
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     self.punkt = nltk.tokenize.PunktWordTokenizer()
Ejemplo n.º 6
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     self.structure = self.get_setting(session, 'structuredOutput', 0)
     self.stem = self.get_setting(session, 'useStem', 0)
     self.pos = self.get_setting(session, 'pos', 0)
     self.justPos = self.get_setting(session, 'justPos', 0)
Ejemplo n.º 7
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     raise MissingDependencyException(self.objectType, "lucene")
Ejemplo n.º 8
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     if nltk is None:
         raise MissingDependencyException(self.objectType, 'nltk')
     self.punkt = nltk.data.load('tokenizers/punkt/english.pickle')
Ejemplo n.º 9
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     if nltk is None:
         raise MissingDependencyException(self.objectType, 'nltk')
     self.punkt = nltk.tokenize.PunktWordTokenizer()
Ejemplo n.º 10
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     self.punkt = nltk.data.load('tokenizers/punkt/english.pickle')
Ejemplo n.º 11
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     self.punkt = nltk.tokenize.PunktWordTokenizer()
Ejemplo n.º 12
0
 def __init__(self, session, config, parent):
     SimpleTokenizer.__init__(self, session, config, parent)
     raise MissingDependencyException(self.objectType, "lucene")