コード例 #1
0
def build_tweet_dictionary():
    import logging
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.DEBUG)

    from twitter_stream.models import Tweet

    stoplist = stopwords.words('english')

    texts = DbTexts(Tweet)
    tokenized = Tokenizer(texts, stoplist=stoplist)

    # build a dictionary
    print "Building a dictionary"
    dictionary = corpora.Dictionary(tokenized)

    # Remove extremely rare words
    dictionary.filter_extremes(no_below=2, no_above=0.5, keep_n=None)
    dictionary.compactify()

    # Save it in the database
    print "Saving the tweet dict"

    from models import Dictionary
    return Dictionary.create_from_gensim_dictionary(dictionary,
                                                    "tweets dictionary")
コード例 #2
0
def build_tweet_dictionary():
    import logging
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
    
    from twitter_stream.models import Tweet
    
    stoplist = stopwords.words('english')
    
    texts = DbTexts(Tweet)
    tokenized = Tokenizer(texts, stoplist=stoplist)
    
    # build a dictionary
    print "Building a dictionary"
    dictionary = corpora.Dictionary(tokenized)
    
    # Remove extremely rare words
    dictionary.filter_extremes(no_below=2, no_above=0.5, keep_n=None)
    dictionary.compactify()
    
    # Save it in the database
    print "Saving the tweet dict"
    
    from models import Dictionary
    return Dictionary.create_from_gensim_dictionary(dictionary, "tweets dictionary")