Esempio n. 1
0
def main():
    #create an instance object of the class TweetToMongo and
    #use database 'toronto_tweets'
    tweet_set = TweetToMongo('toronto_tweets')

    #insert tweets from the txt file to a MongoDB collection
    tweet_set.insert_to_collection('toronto_tweets.txt', 'tweets')

    #create a vocab list from items in the vocab.txt file
    f = open('vocab.txt', 'r')
    vocab = f.readlines()
    f.close()
    vocab = [item.replace('\n', '').decode('utf-8') for item in vocab]

    #join items in the vocab list by '|', which works as or
    regex_string = '|'.join(vocab)

    #filter tweets containing words of the vocab
    tweet_set.filter_tweets(regex_string, 'tweets', 'health')

    #call the get_tweets_vocab method to build a vocab from tweets
    new_vocab = tweet_set.get_tweets_vocab(vocab, 'tweets')

    print new_vocab

    #close the MongoDB connection
    tweet_set.client.close()
access_token_secret = "";

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth_handler=auth)



print " Open MongoDB connection\n"
try:
    conn=pymongo.MongoClient()
    print "Connected!"
except pymongo.errors.ConnectionFailure, e:
    print "Connection failed : %s" % e

rest_db = conn['db_restT']
#mem_db = rest_db.my_collection

toMongo = TweetToMongo()
for query in ["#Warriors", "#NBAFinals2015"]:
    toMongo.start()
    for tweet in tweepy.Cursor(api.search,q=query, since = "2015-07-05", until = "2015-07-12", wait_on_rate_limit = True).items(10):
        toMongo.write(tweet)
        # FYI: JSON is in tweet._json
        print tweet._json
        print tweet.text
        print json.dumps(tweet._json).encode('utf8'),"\n"
    toMongo.end()
    toMongo.push(rest_db)