def thread_worker(seed_id): agent = TwitterAgent() agent.request(seed_id) tweets = agent.get_tweets(seed_id) with open("inputs/"+seed_id+".tweets", "w") as f: for tweet in tweets["tweets"]: f.write(tweet+"\n") with open("inputs/"+seed_id+".retweets", "w") as f: for tweet in tweets["retweets"]: f.write(tweet[0]+": "+tweet[1]+"\n") with open("inputs/"+seed_id+".tweets") as f: handler = TF_IDF.DocumentHandler(f, frequency_filter=3) result = handler.get_term_frequency() print result
__author__ = 'y.zhou' from twitter_connection import TwitterAgent from IRToolKit import TF_IDF MINING = True if __name__ == "__main__": seed_id = "nytimes" if MINING: agent = TwitterAgent() agent.request(seed_id) tweets = agent.get_tweets(seed_id, limit=50) with open("inputs/"+seed_id+".tweets", "w") as f: for tweet in tweets["tweets"]: f.write(tweet+"\n") with open("inputs/"+seed_id+".retweets", "w") as f: for tweet in tweets["retweets"]: f.write(tweet[0]+": "+tweet[1]+"\n") with open("inputs/"+seed_id+".tweets") as f: handler = TF_IDF.DocumentHandler(f, frequency_filter=1) result = handler.get_term_frequency() print result print "done"