def count_for_tag(support, hashtag): print "Searching for %s " % hashtag try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords([hashtag]) # let's define all words we would like to have a look for # tso.setLanguage('en') tso.setCount(100) # please dear Mr Twitter, only give us 7 results per page tso.setIncludeEntities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key="TNX9jTHJgTEyB1IcUECPJ4uSY", consumer_secret="5B1R0geyT1Iv2mBc601gaDwuBBjVkabab72UXbzVTDEJ7Z6XAb", access_token="143109809-5IAGEaGuuiBRjVVJT9WHUQnAQlOkVcemzhnOpMkx", access_token_secret="Yh5WeJo9Z01j42jbTk6tL47zl1Rdox1LJ1d2lJgAAPm0r", verify=False, ) for tweet in ts.searchTweetsIterable(tso): # print tweet['coordinates'] if tweet["place"] != None and tweet["place"].has_key("country"): country = tweet["place"]["country"] __increment(support, country) continue location = tweet["user"]["location"] if len(location) == 0: continue # try: # results = Geocoder.geocode(location) # country = results[0].country # increment(support, country) # continue # except GeocoderError as e: # #print "Could not parse ", location # pass country = None for cn, cd in country_desc.iteritems(): if cn.lower() in location.lower(): country = cn break for desc_part in cd: desc_word = re.compile(r"\b%s\b" % desc_part) if desc_word.search(location): country = cn break if country is not None: break if country is None: pass # print( '%s' % location) else: # print ("Found %s in \"%s\"" % (country, location)) __increment(support, country) if ts.getStatistics()["tweets"] > 1000: break except TwitterSearchException as e: # take care of all those ugly errors if there are some print (e)
def collect_tweets_from_city(arg): if arg in location.keys(): city = arg else: raise KeyError("[WARNING CASE-SENSITIVE] %s location geocode are not known, available locations %s"%(arg,location.keys())) try: tso = TwitterSearchOrder() tso.setCount(100) tso.setIncludeEntities(True) tso.setResultType('mixed') tso.setGeocode(**location[city]) ts = TwitterSearch(**twitter_auth1) conn = MySQLdb.connect(**mysql_auth) curr = conn.cursor() with conn: curr.execute("DROP TABLE IF EXISTS %sTable"%self.table) print 'table dropped' curr.execute("CREATE TABLE IF NOT EXISTS %sTable (Id INT PRIMARY KEY AUTO_INCREMENT,lat DECIMAL(7,5),lon DECIMAL(8,5),place VARCHAR(200),created_at VARCHAR(40),hashtags VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,urls VARCHAR(160) CHARACTER SET utf8 COLLATE utf8_general_ci,user_mentions VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,media VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,favorite_count INT,filter_level VARCHAR(10),tid BIGINT,in_reply_to_screen_name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_general_ci,in_reply_to_status_id BIGINT,in_reply_to_user_id BIGINT,retweet_count INT,source VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,text VARCHAR(160) CHARACTER SET utf8 COLLATE utf8_general_ci,user_id BIGINT,screen_name VARCHAR(100) CHARACTER SET utf8 COLLATE utf8_general_ci,user_location VARCHAR(40) CHARACTER SET utf8 COLLATE utf8_general_ci,retweeted_status_id BIGINT)"%self.table) for tweet in ts.searchTweetsIterable(tso): if tweet['coordinates']!=None: lat = float(tweet['coordinates']['coordinates'][1]) lon = float(tweet['coordinates']['coordinates'][0]) else: lat = 0 lon = 0 place = tweet['place']['full_name'] created_at = tweet['created_at'] hashtags = "%20".join([ item['text'] for item in tweet['entities']['hashtags']]) urls = "%20".join([ item['url'] for item in tweet['entities']['urls']]) user_mentions = "%20".join([ item['id_str']+"%40"+item["screen_name"] for item in tweet['entities']['user_mentions']]) media = "%20".join([ item['id_str']+"%40"+item["media_url"] for item in tweet['entities']['media']]) if 'media' in tweet['entities'].keys() else '' favorite_count = tweet["favorite_count"] if tweet["favorite_count"]!=None else 0 filter_level = tweet["filter_level"] if 'filter_level' in tweet.keys() else '' tid = tweet['id'] in_reply_to_screen_name = tweet["in_reply_to_screen_name"] if tweet["in_reply_to_screen_name"]!=None else 0 in_reply_to_status_id = tweet["in_reply_to_status_id"] if tweet["in_reply_to_status_id"]!=None else 0 in_reply_to_user_id = tweet["in_reply_to_user_id"] if tweet["in_reply_to_user_id"]!=None else 0 retweet_count = tweet["retweet_count"] if tweet["retweet_count"]!=None else 0 source = tweet["source"].replace("'","\\'").replace('"','\\"') text = tweet["text"].replace("'","\\'").replace('"','\\"') user_id = tweet["user"]["id"] screen_name = tweet["user"]["screen_name"] user_location = tweet["user"]["location"] retweeted_status_id = tweet["retweeted_status"]["id"] if "retweeted_status" in tweet.keys() else 0 query = """INSERT INTO %sTable(lat,lon,place,created_at,hashtags,urls,user_mentions,media,favorite_count,filter_level,tid,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,retweet_count,source,text,user_id,screen_name,user_location,retweeted_status_id) VALUES ("%f","%f","%s","%s","%s","%s","%s","%s","%d","%s","%d","%s","%d","%d","%d","%s","%s","%d","%s","%s","%d")"""%(city,lat,lon,place,created_at,hashtags,urls,user_mentions,media,favorite_count,filter_level,tid,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,retweet_count,source,text,user_id,screen_name,user_location,retweeted_status_id) curr.execute(query) except TwitterSearchException as e: print (e)
def fetch_twitter_entries(self): origin = self max_id = origin.max_id since_id = None area = origin.area try: count = 50 tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords(['']) # let's define all words we would like to have a look for tso.setResultType('recent') if origin.max_id: tso.setMaxID(origin.max_id-1) # as per twitter docs tso.setLanguage('en') # we want to see German tweets only tso.setGeocode(latitude=area.lat, longitude=area.long, radius=area.rad, km=True) tso.setCount(count) # please dear Mr Twitter, only give us 7 results per page tso.setIncludeEntities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( **settings.TWITTER ) total = 0 for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :) max_id = save_tweets(area, origin, tweet) if not since_id: since_id = max_id #import ipdb;ipdb.set_trace() #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) total += 1 if total >= 50: break except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) # except: # pass finally: origin.max_id = max_id origin.save()