Example #1
0
File: moses.py Project: DrDub/nltk
 def __init__(self, lang='en'):
     # Initialize the object.
     super(MosesTokenizer, self).__init__()
     self.lang = lang
     # Initialize the language specific nonbreaking prefixes.
     self.NONBREAKING_PREFIXES = nonbreaking_prefixes.words(lang)
     self.NUMERIC_ONLY_PREFIXES = [w.rpartition(' ')[0] for w in 
                                   self.NONBREAKING_PREFIXES if 
                                   self.has_numeric_only(w)]
Example #2
0
 def __init__(self, lang='en'):
     # Initialize the object.
     super(MosesTokenizer, self).__init__()
     self.lang = lang
     # Initialize the language specific nonbreaking prefixes.
     self.NONBREAKING_PREFIXES = [_nbp.strip() for _nbp in nonbreaking_prefixes.words(lang)]
     self.NUMERIC_ONLY_PREFIXES = [w.rpartition(' ')[0] for w in
                                   self.NONBREAKING_PREFIXES if
                                   self.has_numeric_only(w)]