class TwitterAPIAccess(object): def __init__(self, database_manager, stop_words, user_name, zone_index): self.dm = database_manager self.filter = RegexTokenizer() | LowercaseFilter() | StopFilter() | StopFilter(stop_words) self.zone_index = zone_index self.api = TwitterAPI(app_auth[user_name].ckey, app_auth[user_name].csec, app_auth[user_name].atoken, app_auth[user_name].asec) def start_stream(self): while True: try: print 'in stream...' response = self.api.request('statuses/filter', {'locations': locations[self.zone_index]}) for tweet in response: filtered_tweet = self.map_tweet_fields(dict(tweet)) if self.dm.not_exist(filtered_tweet['_id']): print '[%s] insert %s' % (datetime.now(), tweet["id_str"]) self.dm.save_tweet(filtered_tweet) except KeyboardInterrupt: print('TERMINATED BY USER') break except Exception as e: print('Error: %s %s' % (type(e), e)) time.sleep(60) @staticmethod def map_tweet_fields(json_object): response = { "_id": json_object["id_str"], "user": { "name": json_object["user"]["name"], "screen_name": json_object["user"]["screen_name"], "followers_count": json_object["user"]["followers_count"], "location": json_object["user"]["location"], "description": json_object["user"]["description"], "statuses_count": json_object["user"]["statuses_count"], "friends_count": json_object["user"]["friends_count"], "listed_count": json_object["user"]["listed_count"] }, "where": { "coordinates": json_object["coordinates"]["coordinates"] if json_object["coordinates"] else None }, "what": { "text": json_object["text"], "entities": json_object["entities"], "lang": json_object["lang"] }, "about": { "retweet_count": json_object["retweet_count"], "source": json_object["source"], "favorite_count": json_object["favorite_count"] }, "when": { "created_at_str": json_object["created_at"], "created_at_timestamp": time.mktime( datetime.strptime(json_object["created_at"], "%a %b %d %H:%M:%S +0000 %Y").timetuple()) } } return response
def main(): # get arguments args = get_args() # read required json files bounding, db_auth, api_auths = read_jsons() # db url url = "http://" + db_auth["user"] + ":" + db_auth["pwd"] \ + "@" + db_auth["ip"] + ":" + db_auth["port"] + "/" # initialise db and twitter api storage = TweetStore(args.db_name, url) apis = [] for api_auth in api_auths: api = TwitterAPI(api_auth["API_KEY"], api_auth["API_SECRET"], api_auth["ACCESS_TOKEN"], api_auth["ACCESS_TOKEN_SECRET"]) apis.append(api) t1 = threading.Thread(target=twitter_streaming, args=(apis[0], storage, bounding, args.region)) t2 = threading.Thread(target=twitter_user_timeline, args=(apis[1:], storage)) # start streaming and getting timelines t1.start() t2.start()
def __init__(self, consumer_key, consumer_secret, data_dir, access_token_key=None, access_token_secret=None): """ Construct a new Miner for retrieving data from Twitter. if access_token_key or access_token_secret are None, use app authentication with twitter's API :param consumer_key: :param consumer_secret: :param data_dir: main directory to store the data :param access_token_key: :param access_token_secret: """ if access_token_key is None or access_token_secret is None: self.api = TwitterAPI(consumer_key, consumer_secret, auth_type='oAuth2') else: self.api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) self.writer = DW(data_dir) self.logger = logging.getLogger() self.queues = {type: Queue() for type in JOBS_TYPES} # python queues are thread safe and don't require locks for multi-producers/consumers # create thread for each different job type self.threads = [ Thread(target=Miner._run_consumer, args=(self, 'followers_ids', Miner._mine_followers_ids)), Thread(target=Miner._run_consumer, args=(self, 'friends_ids', Miner._mine_friends_ids)), Thread(target=Miner._run_consumer, args=(self, 'tweets', Miner._mine_tweets)), Thread(target=Miner._run_consumer, args=(self, 'likes', Miner._mine_likes)), Thread(target=Miner._run_consumer, args=(self, 'user_details', Miner._mine_user_details)), Thread(target=Miner._run_consumer, args=(self, 'neighbors', Miner._mine_neighbors)), Thread(target=Miner._run_consumer, args=(self, 'listen', Miner._listen)) ]
def main(): # get arguments args = get_args() # read required json files db_auth, api_auth = read_jsons() # db url url = "http://" + db_auth["user"] + ":" + db_auth["pwd"] \ + "@" + db_auth["ip"] + ":" + db_auth["port"] + "/" # initialise db and twitter api storage = TweetStore(args.db_name, url) api = TwitterAPI(api_auth["API_KEY"], api_auth["API_SECRET"], api_auth["ACCESS_TOKEN"], api_auth["ACCESS_TOKEN_SECRET"]) twitter_search(api, storage, args.query)
from TweetStore import TweetStore from TwitterAPI.TwitterAPI import TwitterAPI COUCH_DATABASE = 'test_db' TWITTER_ENDPOINT = 'statuses/filter' TWITTER_PARAMS = {'track':'pizza'} API_KEY = XXX API_SECRET = XXX ACCESS_TOKEN = XXX ACCESS_TOKEN_SECRET = XXX storage = TweetStore(COUCH_DATABASE) api = TwitterAPI(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) for item in api.request(TWITTER_ENDPOINT, TWITTER_PARAMS): if 'text' in item: print('%s -- %s\n' % (item['user']['screen_name'], item['text'])) storage.save_tweet(item) elif 'message' in item: print('ERROR %s: %s\n' % (item['code'], item['message']))
class Miner: """ A Miner object can talk to twitter (via twitter's API), retrieve data and store it in a local database. The Miner actions are reflected in the database. The miner should be invoked by the method run(). The miner will then consist of multiple threads, each will handle a specific kind of job (each job has a different limit for using Twitter's API, so each thread will handle its job's limit). For each job type there is a special queue to which new jobs are inserted. A new job will appear in the queue in the form of a dictionary that consist of all needed arguments for this job. To properly close the miner (finish all its current jobs and writes) call the finish() method. For more information about what arguments are needed for a specific job, look at the doc of its corresponding mine function, e.g. to add new job for getting followers ids look at _mine_followers_ids() New jobs should be added only via the produce_job() function Each mining function related to some user will automatically mine the details of this user, so there is no need to call mine_user_details for users for which we perform other mining jobs. """ def __init__(self, consumer_key, consumer_secret, data_dir, access_token_key=None, access_token_secret=None): """ Construct a new Miner for retrieving data from Twitter. if access_token_key or access_token_secret are None, use app authentication with twitter's API :param consumer_key: :param consumer_secret: :param data_dir: main directory to store the data :param access_token_key: :param access_token_secret: """ if access_token_key is None or access_token_secret is None: self.api = TwitterAPI(consumer_key, consumer_secret, auth_type='oAuth2') else: self.api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) self.writer = DW(data_dir) self.logger = logging.getLogger() self.queues = {type: Queue() for type in JOBS_TYPES} # python queues are thread safe and don't require locks for multi-producers/consumers # create thread for each different job type self.threads = [ Thread(target=Miner._run_consumer, args=(self, 'followers_ids', Miner._mine_followers_ids)), Thread(target=Miner._run_consumer, args=(self, 'friends_ids', Miner._mine_friends_ids)), Thread(target=Miner._run_consumer, args=(self, 'tweets', Miner._mine_tweets)), Thread(target=Miner._run_consumer, args=(self, 'likes', Miner._mine_likes)), Thread(target=Miner._run_consumer, args=(self, 'user_details', Miner._mine_user_details)), Thread(target=Miner._run_consumer, args=(self, 'neighbors', Miner._mine_neighbors)), Thread(target=Miner._run_consumer, args=(self, 'listen', Miner._listen)) ] def _mine_user_details(self, args): """ retrieve details of a specific user according to its screen name. :param args: dictionary with a key 'screen_name' which indicates the user to retrieve :return: the id (integer) of the user """ screen_name = args['screen_name'] self.logger.info('mining user details of {0}'.format(screen_name)) time.sleep(1) # one request per second will avoid rate limit try: r = self.api.request('users/show', params={'screen_name': screen_name}) if r.status_code >= 400: try: msg = r.json()['errors'][0]['message'] self.logger.error( 'mining user details failed. {0}'.format(msg)) except ValueError: # response body does not contain valid json self.logger.error( 'mining user details failed. Error code {0}'.format( r.status_code)) return details = r.json() self.writer.write_user(details) self.logger.info('user details mined successfully') return details['id'] except TwitterConnectionError as e: # message is logged the TwitterConnectionError constructor return None def _produce_user_details_job(self, screen_name): """ produce a new user_details job, if the details don't already exist. this function is called by other mining functions. """ if not self.writer.user_details_exist(screen_name): self.produce_job('user_details', {'screen_name': screen_name}) def _mine_friends_followers(self, args, resource, writer_func): """ retrieve ids of friends or followers :param args: dictionary with the screen_name and limit :param resource: the resource ('followers/ids' or 'friends/ids') :param writer_func: the writer's function to use """ screen_name = args['screen_name'] limit = args['limit'] self._produce_user_details_job(screen_name) if limit == 0: limit = float('inf') ids = [] total = 0 # total number of ids we retrieved so far r = TwitterPager(self.api, resource, params={'screen_name': screen_name}) for id in r.get_iterator(): ids.append(id) total += 1 if len(ids) > MAX_IDS_LIST: writer_func(self.writer, ids, screen_name) ids = [] if total >= limit: break writer_func(self.writer, ids, screen_name) def _mine_followers_ids(self, args): """ retrieve ids of the user's followers :param args: dictionary with keys 'screen_name' and 'limit'. limit 0 means no limit :return: """ self.logger.info('mining followers ids for user {0}'.format( args['screen_name'])) try: self._mine_friends_followers(args, 'followers/ids', DW.write_followers) self.logger.info('followers mined successfully') except TwitterRequestError as e: self.logger.error( 'mining followers failed. Status code {0}: {1}'.format( e.status_code, e.msg)) def _mine_friends_ids(self, args): """ retrieve ids of the user's friends :param args: dictionary with keys 'screen_name' and 'limit'. limit 0 means no limit :return: """ self.logger.info('mining friends ids for user {0}'.format( args['screen_name'])) try: self._mine_friends_followers(args, 'friends/ids', DW.write_followers) self.logger.info('friends mined successfully') except TwitterRequestError as e: self.logger.error( 'mining friends failed. Status code {0}: {1}'.format( e.status_code, e.msg)) def _mine_tweets_likes(self, args, resource, writer_func): """ retrieve tweets or likes of a user :param args: dictionary with keys 'screen_name' and 'limit' :param resource: 'statuses/user_timeline' or 'favorites/list' :param writer_func: the writer's function to use """ screen_name = args['screen_name'] limit = args['limit'] self._produce_user_details_job(screen_name) if limit == 0: limit = float('inf') tweets = [] total = 0 r = TwitterPager(self.api, resource, params={ 'screen_name': screen_name, 'count': 200, 'tweet_mode': 'extended' }) for t in r.get_iterator(): tweets.append(t) total += 1 if len(tweets) > MAX_TWEETS_LIST: writer_func(self.writer, tweets, screen_name) tweets = [] if total >= limit: break writer_func(self.writer, tweets, screen_name) def _mine_tweets(self, args): """ retrieve tweets of the given user :param args: dictionary with keys 'screen_name' and 'limit' :return: """ try: self.logger.info('mining tweets of user {0}'.format( args['screen_name'])) self._mine_tweets_likes(args, 'statuses/user_timeline', DW.write_tweets_of_user) self.logger.info('tweets mined successfully') except TwitterRequestError as e: self.logger.error( 'mining tweets failed. Status code {0}: {1}'.format( e.status_code, e.msg)) def _mine_likes(self, args): """ retrieve tweets that the user likes :param args: dictionary with keys 'screen_name' and 'limit' :return: """ try: self.logger.info('mining likes of user {0}'.format( args['screen_name'])) self._mine_tweets_likes(args, 'favorites/list', DW.write_likes) self.logger.info('likes mined successfully') except TwitterRequestError as e: self.logger.error( 'mining likes failed. Status code {0}: {1}'.format( e.status_code, e.msg)) def _mine_neighbors(self, args): """ this function mines neighbors of a given user. B is considered a neighbor of A if one of the following holds: * A retweeted a tweet by B * A quoted a tweet by B * A replied to B data is stored for each tweet that indicates some neighbor of the given user. for each such tweet, the following information is stored: neighbor_screen_name;tweet_id;neighborship_type neighborship_type is one of the constants RETWEET, QUOTE or REPLY from TwitterMine.utils :param args: dictionary with keys 'screen_name' and 'limit' """ screen_name = args['screen_name'] limit = args['limit'] self.logger.info('mining neighbors of user {0}'.format(screen_name)) self._produce_user_details_job(screen_name) if limit == 0: limit = float('inf') neighbors = [] total = 0 r = TwitterPager(self.api, 'statuses/user_timeline', params={ 'screen_name': screen_name, 'count': 200 }) try: for t in r.get_iterator(): total += 1 neighbor_scr_name = utils.get_original_author(t) if neighbor_scr_name is not None and neighbor_scr_name != screen_name: type = utils.get_tweet_type(t) neighbors.append('{0};{1};{2}'.format( neighbor_scr_name, t['id_str'], type)) if len(neighbors) >= MAX_TWEETS_LIST: self.writer.write_neighbors(neighbors, screen_name) neighbors = [] if total >= limit: break self.writer.write_neighbors(neighbors, screen_name) self.logger.info('neighbors mined successfully') except TwitterRequestError as e: self.logger.error( 'mining neighbors failed. Status code {0}: {1}'.format( e.status_code, e.msg)) def _update_listen_parameters(self, track, follow, args): """ update the current listen parameters (track and follow) according to the given args :param track: set :param follow: set :param args: args dictionary to the listen job :return: a tuple (track, follow) - the updated sets """ if args['mode'] == 'add': if 'track' in args: track = track.union(args['track']) if 'follow' in args: follow = follow.union(args['follow']) elif args['mode'] == 'remove': if 'track' in args: track = track.difference(args['track']) if 'follow' in args: follow = follow.difference(args['follow']) return track, follow def _get_listen_query_representation(self, track, follow): """ return a unique string representation for a twitter's listen request. representation depends on the request arguments track and follow :param track: set of strings :param follow: set of strings :return: string - representation of the listen request """ keywords = [term for term in track] + [term for term in follow] keywords.sort() return '.'.join(keywords) def _listen(self): """ this function handles all listen jobs. the args to a listen job is a dictionary with the following structure {'mode' : 'add' / 'remove', 'track' : ['term1', 'term2', ... ], 'follow' : ['id1', 'id2', ... ] } :return: this function does not return """ track = set() follow = set() args_queue = self.queues['listen'] while True: try: while (not args_queue.empty()) or (not track and not follow): # there are more arguments to process OR both track and follow are empty args = args_queue.get( block=True, timeout=None) # will block if queue is empty if args is STOP_SIGNAL: return args_queue.task_done() track, follow = self._update_listen_parameters( track, follow, args) self.logger.info('listening: track={0}, follow={1}'.format( str(track), str(follow))) r = self.api.request( 'statuses/filter', { 'track': ','.join(track), 'follow': ','.join(follow), 'tweet_mode': 'extended', 'stall_warnings': 'true', 'filter_level': FILTER_LEVEL }) stream_str = self._get_listen_query_representation( track, follow) # the string # representation of the stream iterator = r.get_iterator() for item in iterator: if item: if 'warning' in item: self.logger.warning(item['warning']['message']) elif 'disconnect' in item: event = item['disconnect'] self.logger.error( 'streaming API shutdown: {0}'.format( event['reason'])) break elif 'text' in item or 'full_text' in item or 'extended_tweet' in item: # item is a tweet. ready to be written self.writer.write_tweets_of_stream([item], stream_str) # currently, no use in the following types of messages elif 'delete' in item: # user deleted a tweet tweet = item['status'] pass elif 'limit' in item: # more Tweets were matched than the current rate limit allows pass elif 'event' in item and item['event'] == 'user_update': # user updated his profile pass if not args_queue.empty(): # new job args received. close current connection, update args and # start again r.close() break except TwitterRequestError as e: if e.status_code < 500: # something needs to be fixed before re-connecting # print information and start a new empty listen job self.logger.error( 'Got exception during listen job: {0}. Starting an empty listen job' .format(e)) track = set() follow = set() else: # temporary interruption, re-try request pass except TwitterConnectionError: # temporary interruption, re-try request pass def _run_consumer(self, job_type, job_func): """ consume all jobs of a specific type. this function does not return and constantly handles or waiting for new jobs :param job_type: the type of job (one of the constants in miner.JOBS_TYPES) :param job_func: the miner function that should be called for handling this job :return: """ if job_type == 'listen': job_func(self) else: # standard rest API job while True: try: job_args = self.queues[job_type].get(block=True, timeout=None) if job_args is STOP_SIGNAL: return # if no job available, get() will block until new job arrives job_func(self, job_args) self.queues[job_type].task_done( ) # this is to indicate that the job was processed. # this is important if anyone wants to wait until all jobs in the queue are done except Exception as e: self.logger.error('{0} job failed: {1}'.format( job_type, str(e))) pass def produce_job(self, job_type, args): """ Create a new job to be handles by the miner. :param job_type: the job to perform. one of the constants in miner.JOBS_TYPES :param args: dictionary with the needed arguments for this job :return: """ if args is STOP_SIGNAL: self.logger.error( 'invalid job arguments to "{0}"'.format(job_type)) if job_type not in JOBS_TYPES: raise ValueError('Unsupported job type: "{0}"'.format(job_type)) self.queues[job_type].put(args) def run(self): """ Start the miner. must be called before producing new jobs """ for t in self.threads: t.start() def stop(self): """ Finishes all jobs that were produced for the miner, and stop the miner. After calling this function new jobs should not be produced """ self.logger.info('notify all miner threads to stop') for type in JOBS_TYPES: self.queues[type].put(STOP_SIGNAL) self.logger.info('wait for all miner threads to stop') for t in self.threads: t.join() self.logger.info('miner stopped')
import random import re from time import sleep from TwitterAPI.TwitterAPI import TwitterAPI from colors import colors_by_name from shm import led __author__ = 'zander' api = TwitterAPI("3izkk25JAjqyetdxz7UzwN9tr", "W01dNPAmv2FIUmvsqPTkSjIg5364dxT4cbvdH8SltNbFUXzifn", "61233167-E5VTAoVXV02RwbeMrpRdlJhZs6jLGS32JYHxOs97t", "cjdI8OvMHVpTXzFAnmEw1TpwC2i1x2nEdZdh5znqfDyc5") r = api.request("user") def set_led_color(red, green, blue): led.port_red.set(red) led.port_green.set(green) led.port_blue.set(blue) led.starboard_red.set(red) led.starboard_green.set(green) led.starboard_blue.set(blue) set_led_color(0, 100, 255)
# =================================================================== from TweetStore import TweetStore from TwitterAPI.TwitterAPI import TwitterAPI city_bound_googleV3 = [-84.5518189, 33.648079, -84.289389, 33.8876179] chunks_bound = [[-84.5518189, 33.648079, -84.42060395, 33.76784845], \ [-84.5518189, 33.76784845, -84.42060395, 33.8876179], \ [-84.42060395, 33.648079, -84.289389, 33.76784845],\ [-84.42060395, 33.76784845, -84.289389, 33.8876179]] COUCH_DATABASE = 'test_db' TWITTER_ENDPOINT = 'statuses/filter' TWITTER_PARAMS = {'locations': chunks_bound[0]} API_KEY = "lSjoTqZ4ofmtCr0uh7aJZRQcp" API_SECRET = "qfkI0RjyOetHNDE6EJhojNlqRf4B7lbZj2rTQBmTZYHT9sRjlc" ACCESS_TOKEN = "3186003008-eAzy3mSzxHRYuWzji65Xi0JrjTFqJTO81MU2cKK" ACCESS_TOKEN_SECRET = "Rvvo931v0hbMyRX8sg8QG51cVlY8LQuij8zXuA9aP1hIh" storage = TweetStore(COUCH_DATABASE) api = TwitterAPI(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) for item in api.request(TWITTER_ENDPOINT, TWITTER_PARAMS): if 'text' in item: print('%s -- %s\n' % (item['user']['screen_name'], item['text'])) storage.save_tweet(item) elif 'message' in item: print('ERROR %s: %s\n' % (item['code'], item['message']))
# -*- coding: UTF-8 -*- from tweetStore import TweetStore from TwitterAPI.TwitterAPI import TwitterAPI # Your Twitter authentication credentials... API_KEY = 'Rellenar con tus credenciales' API_SECRET = 'Rellenar con tus credenciales' ACCESS_TOKEN = 'Rellenar con tus credenciales' ACCESS_TOKEN_SECRET = 'Rellenar con tus credenciales' storage = TweetStore('nombreBD') api = TwitterAPI(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) for item in api.request('statuses/filter', {'track':'PalabraAFiltrar'}): if 'text' in item: #print ('%s -- %s\n' % (item['user']['screen_name'],item['text'])) print (item['user']['screen_name'], item['text']) storage.save_tweet(item) elif 'message' in item: print('ERROR %s: %s\n' % (item['code'], item['message']))
def __init__(self, database_manager, stop_words, user_name, zone_index): self.dm = database_manager self.filter = RegexTokenizer() | LowercaseFilter() | StopFilter() | StopFilter(stop_words) self.zone_index = zone_index self.api = TwitterAPI(app_auth[user_name].ckey, app_auth[user_name].csec, app_auth[user_name].atoken, app_auth[user_name].asec)
view.sync(self.db) def save_tweet(self,tw): tw['_id']=tw['id_str'] self.db.save(tw) def count_tweets(self): for doc in self.db.view('twitter/count_tweets'): return doc.value def get_tweets(self): return self.db.view('twitter/get_tweets') from TweetStore import TweetStore from TwitterAPI.TwitterAPI import TwitterAPI api_key='' api_secret='' access_token='' access_secret='' storage=TweetStore('test_db') api=TwitterAPI(api_key,api_secret,access_token,access_secret) for item in api.request('search/tweets',{'q':'taarangana'}): if 'text' in item: print('%s -- %s\n' % (item['user']['screen_name'],item['text'])) storage.save_tweet(item) elif 'message' in item: print('ERROR %s: %s\n' % (item['code'],item['message']))
def get_app_api(): config = get_config() return TwitterAPI(config['consumer_key'], config['consumer_secret'], auth_type='oAuth2')
def get_user_api(): config = get_config() return TwitterAPI(config['consumer_key'], config['consumer_secret'], config['access_token_key'], config['access_token_secret'])
from common import * streamDatabase = StreamDatabase('tweets') Current = [] with open("../../Common/current.txt", 'r') as current: Current = current.readlines() APIKey = Current[1].strip() APISecretKey = Current[2].strip() AccessToken = Current[3].strip() AccessTokenSecret = Current[4].strip() api = TwitterAPI(APIKey, APISecretKey, AccessToken, AccessTokenSecret) while True: try: for tweet in api.request('statuses/filter', {'track': '', 'locations': melbourneBoundingBox}): if 'text' in tweet: docid = tweet['id_str'] if docid in streamDatabase.database: with open('stream_log','a') as f: f.write("["+datetime.datetime.now().__str__()+"]"+'\n') f.write('The tweet is already present\n')