def __init__(self): # Tag/Chunk data with ark_tweet_nlp if enabled_modules['ark_tweet']: self.ark_tweet = ark_tweet.ArkTweetNLP() else: self.ark_tweet = None # Count all token frequencies tf_idf._build_dictionary(self.ark_tweet, '/data1/nlp-data/twitter/data/etc/') # Spelling correction self.speller = spell.SpellChecker()
def __init__(self): # Tag/Chunk data with ark_tweet_nlp if enabled_modules['ark_tweet']: self.ark_tweet = ark_tweet.ArkTweetNLP() else: self.ark_tweet = None # Count all token frequencies tf_idf._build_dictionary(self.ark_tweet, enabled_modules['twitter_data']) # Spelling correction self.speller = spell.SpellChecker()
def __init__(self): # Tag/Chunk data with ark_tweet_nlp if enabled_modules['ark_tweet']: self.ark_tweet = ark_tweet.ArkTweetNLP() else: self.ark_tweet = None # Lookup tweet metadata if enabled_modules['twitter_data']: self.twitter_data = twitter_data.TwitterData() # Get HTML data from URLs in tweets if enabled_modules['url']: self.url = url.Url() # Count all token frequencies tf_idf._build_dictionary(self.ark_tweet, '/data1/nlp-data/twitter/data/etc/') # Spelling correction self.speller = spell.SpellChecker() if enabled_modules['ukb_wsd']: self.ukb = ukb_wsd.ukbWSD()