def __init__(self):
     pattern = ur'|'.join(
         TwitterTokenizer.REGEX_STRINGS.values()).format(**REGEX_CONSTANTS)
     RegexpTokenizer.__init__(self,
                              pattern=pattern,
                              flags=re.UNICODE | re.MULTILINE | re.VERBOSE
                              | re.IGNORECASE)
Beispiel #2
0
 def __init__(self):
     RegexpTokenizer.__init__(self, '\S+|\n')
Beispiel #3
0
 def __init__(self):
     RegexpTokenizer.__init__(self,
                              tb_pattern,
                              flags=re.UNICODE | re.MULTILINE | re.DOTALL
                              | re.VERBOSE)
 def __init__(self):
     pattern = ur'|'.join(TwitterTokenizer.REGEX_STRINGS.values()).format(**REGEX_CONSTANTS)
     RegexpTokenizer.__init__(self, pattern=pattern, flags=re.UNICODE | re.MULTILINE | re.VERBOSE | re.IGNORECASE)