def on_success(self, tweet): if tweet: ptweet = ProcessedTweet() success = ptweet.process_raw(tweet, self.userinfo, requiregeo=True) if success: o.write(ptweet.__str__()) if ptweet.inreply: orep.write('/'.join([ptweet.user, ptweet.status_id]) + ': ' + '/'.join(ptweet.inreply) + '\n') #check if max time has lapsed if time.time() - self.start_time > maxsecs: self.disconnect()
def on_success(self, tweet): if tweet: ptweet = ProcessedTweet() success = ptweet.process_raw(tweet, self.userinfo, requiregeo=True) if success: o.write(ptweet.__str__()) if ptweet.inreply: orep.write('/'.join([ptweet.user, ptweet.status_id]) +': ' +'/'.join(ptweet.inreply)+'\n') #check if max time has lapsed if time.time()-self.start_time>maxsecs: self.disconnect()
orep = codecs.open(args.basename+'.replies.txt', 'w', 'utf-8') o.write('userid\ttweet\tstatusid\tdate\tlat\tlon\n') userinfo = {} searcher = Twython(CONSUMER_KEY, CONSUMER_SECRET) until_id = 1e30 for batch in range(args.numtweets/100): results = searcher.search(q=args.searchterm, count=100, max_id=until_id-1, result_type='recent') #can change result_type to popular or mixed print "Searching until status", until_id if len(results['statuses'])==1: #usually repeat after data runs out break for tweet in results['statuses']: ptweet = ProcessedTweet() success = ptweet.process_raw(tweet, userinfo, requiregeo = False, lang = args.lang, requireword = args.searchterm) if success: o.write(ptweet.__str__()) if ptweet.inreply: orep.write('/'.join([ptweet.user, ptweet.status_id]) +': ' +'/'.join(ptweet.inreply)+'\n') until_id = min(until_id, int(ptweet.status_id)) time.sleep(5) #throttling orep.close() o.close() write_dict_tsv(userinfo, args.basename+'.userinfo.tsv')
CONSUMER_KEY = "JksOBh39nyd95jagJQTZ8Q" CONSUMER_SECRET = "kx87N1Ge8iWuzwcWUH55PhUDOFCqBju6UqUtroYFo" word = sys.argv[1] n = int(sys.argv[2]) o = codecs.open(word+'.statuses.tsv', 'w', 'utf-8') orep = codecs.open(word+'.replies.txt', 'w', 'utf-8') userinfo = {} searcher = Twython(CONSUMER_KEY, CONSUMER_SECRET, OAUTH_TOKEN, OAUTH_SECRET) until_id = 1e30 for batch in range(n): #at most n*100 tweets results = searcher.search(q=word.replace('_', ' OR '), count=100, max_id=until_id-1) for tweet in results['statuses']: ptweet = ProcessedTweet() success = ptweet.process_raw(tweet, userinfo, requiregeo = False) if success: o.write(ptweet.__str__()) if ptweet.inreply: orep.write('/'.join([ptweet.user, ptweet.status_id]) +': ' +'/'.join(ptweet.inreply)+'\n') until_id = min(until_id, int(ptweet.status_id)) print "Searching until status", until_id time.sleep(5) #throttling oj = open(word+'.userinfo.json', 'w') json.dump(userinfo, oj) oj.close()