def main(): args = parse_arguments() if not all((args.token, args.token_secret, args.consumer_key, args.consumer_secret)): print(__doc__) return 2 # When using twitter stream you must authorize. auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com') tweet_iter = stream.user() elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com') tweet_iter = stream.site() else: stream = TwitterStream(auth=auth, timeout=60.0) tweet_iter = stream.statuses.sample() # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if tweet.get('text'): printNicely(tweet['text'])
def stream(domain, args, name='Rainbow Stream'): """ Track the stream """ # The Logo art_dict = { c['USER_DOMAIN']: name, c['PUBLIC_DOMAIN']: args.track_keywords, c['SITE_DOMAIN']: name, } if c['ASCII_ART']: ascii_art(art_dict[domain]) # These arguments are optional: stream_args = dict( timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) # Track keyword query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords # Get stream stream = TwitterStream( auth=authen(), domain=domain, **stream_args) try: if domain == c['USER_DOMAIN']: tweet_iter = stream.user(**query_args) elif domain == c['SITE_DOMAIN']: tweet_iter = stream.site(**query_args) else: if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() for tweet in tweet_iter: if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): draw( t=tweet, keyword=args.track_keywords, check_semaphore=True, fil=args.filter, ig=args.ignore, ) elif tweet.get('direct_message'): print_message(tweet['direct_message'],check_semaphore=True) except TwitterHTTPError: printNicely('') printNicely( magenta("We have maximum connection problem with twitter'stream API right now :("))
def main(): args = parse_arguments() # When using twitter stream you must authorize. auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) # These arguments are optional: stream_args = dict( timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com', **stream_args) tweet_iter = stream.site(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Connect to RethinkDB try: db.connect() except RqlDriverError: log.error('Couldn\'t connect to database.') # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete or data message. if tweet is None: log.error('None') elif tweet is Timeout: log.error('Timeout') elif tweet is HeartbeatTimeout: log.error('Heartbeat Timeout') elif tweet is Hangup: log.error('Hangup') elif tweet.get('text'): tweet['stream_id'] = '1234567890' # TODO: add that stream_id gets passed to open_stream.py db.insert(DATABASE, TWEETS_TABLE, tweet) printNicely(tweet['text']) else: log.error('Some data ' + str(tweet))
def main(): args = parse_arguments() # When using twitter stream you must authorize. auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) # These arguments are optional: stream_args = dict( timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com', **stream_args) tweet_iter = stream.site(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): printNicely(tweet['text']) else: printNicely("-- Some data: " + str(tweet))
def get_stream_iterator(track_keywords): # TODO: from config args = { 'token': "1010873339819196416-3yDJVTlPLOyptBjx8DAeB9feAXSpJp", 'token_secret': "i2ddWam867Y1wWYxvC0b4KcYViZuQ36hDoHkYNm8CCUko", 'consumer_key': "Y3eSkzd8OPea2lUoXypISoBT2", 'consumer_secret': "nZOjyOuzBjgUgEzygK4zU3T6PiI4MnTLdHSm1Y2JuHiVW00Hfv", 'timeout': 8000, 'heartbeat_timeout': 3000, 'track_keywords': track_keywords, 'no_block': True } auth = OAuth(args['token'], args['token_secret'], args['consumer_key'], args['consumer_secret']) stream_args = dict( timeout = args['timeout'], block = not args['no_block'], heartbeat_timeout = args['heartbeat_timeout']) query_args = dict() if args.get('track_keywords'): query_args['track'] = args['track_keywords'] stream = TwitterStream(auth=auth, **stream_args) if args.get('track_keywords'): tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() return tweet_iter
def user_stream_iter(auth, request_kwargs): stream_connection = TwitterStream( auth=auth, timeout=30, domain='userstream.twitter.com').user(**request_kwargs) while True: try: for tweet in stream_connection: if tweet: try: if tweet.get('delete'): continue elif tweet.get('warning') or tweet.get('disconnect'): yield StreamResult(StreamResultError, tweet) elif tweet.get('text'): yield StreamResult(StreamResultItem, tweet) else: print('unknown item:', tweet) yield StreamResult(StreamResultKeepAlive, 'keep-alive') except ValueError: continue except ChunkedEncodingError as err: continue except KeyboardInterrupt: return
def start_stream(): # Get request arguments tw_account = request.json['twitter_account'] tw_token = request.json['twitter_token'] stream_word = request.json['filterWord'] stream_loc = request.json['location'] stream_coords = region_to_coords(stream_loc) # Retrieving token secret previously saved token_secret = secrets.get(tw_account, tw_token) # Creates and starts the stream stream = TwitterStream(tw_token, token_secret, send_tweet) streams.start_stream( account=tw_account, stream=stream, stream_props={ 'filter_term': stream_word, 'filter_coords': stream_coords, }, ) return '', 200
def main(): args = parse_arguments() # When using twitter stream you must authorize. auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) # These arguments are optional: stream_args = dict(timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com', **stream_args) tweet_iter = stream.site(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Connect to RethinkDB try: db.connect() except RqlDriverError: log.error('Couldn\'t connect to database.') # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete or data message. if tweet is None: log.error('None') elif tweet is Timeout: log.error('Timeout') elif tweet is HeartbeatTimeout: log.error('Heartbeat Timeout') elif tweet is Hangup: log.error('Hangup') elif tweet.get('text'): tweet[ 'stream_id'] = '1234567890' # TODO: add that stream_id gets passed to open_stream.py db.insert(DATABASE, TWEETS_TABLE, tweet) printNicely(tweet['text']) else: log.error('Some data ' + str(tweet))
def get_stream(self, on_behalf_of=None, **kwargs): # If user is None, tweet from the app's account if on_behalf_of is None: oauth = self.get_app_oauth() # Otherwise, tweet from the user's twitter account else: oauth = self.get_user_oauth(on_behalf_of) return TwitterStream(auth=oauth, **kwargs)
def do_stuff(auth, query_args, stream_args, toggle): if toggle == "toggle": # We'll do a short tweet than a long one then a short... do_short = True else: # Only one size do_short = toggle == "short" word_count = 0 tweet_count = 0 stream = TwitterStream(auth=auth, **stream_args) tweet_iter = stream.statuses.filter(**query_args) # Iterate over the sample stream for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if tweet is None: pass # printNicely("-- None --") # elif tweet is Timeout: # printNicely("-- Timeout --") # elif tweet is HeartbeatTimeout: # printNicely("-- Heartbeat Timeout --") # elif tweet is Hangup: # printNicely("-- Hangup --") else: if "extended_tweet" in tweet: text = tweet["extended_tweet"]["full_text"] else: text = tweet.get("text") # pprint(tweet) processed = process_tweet(text, query_args['track'], do_short) if processed: tweet_count += 1 word_count += len(processed.split()) processed = do_html_things(processed) printNicely( '<div id={0} class={1}><a href=#{0}>{2}</a></div>'.format( tweet_count, "s" if do_short else "l", processed)) # print(do_short) if toggle == "toggle": do_short = not do_short # toggle # print(do_short) if word_count > 56000: # 280 * 200 break printNicely("")
def __init__(self, auth, subscription): threading.Thread.__init__(self) stream = TwitterStream(auth=auth, block=True) self.stopped = False self.endpoint = subscription['endpoint'] if subscription['data'].get('track'): self.iterator = stream.statuses.filter( track=subscription['data']['track']) else: self.iterator = stream.statuses.sample()
def start_stream(self, account: str, stream: TwitterStream, stream_props: dict) -> None: """ Starts a Twitter stream given an account :param account: Twitter account :param stream: Twitter stream object :param stream_props: Twitter stream start parameters: 1. Filter term (i.e: 'Golden gate') 2. Filter coords (i.e: [-74,40,-73,41]) """ # Stopping previous stream in case it existed self.stop_stream(account) self.set(account, stream) stream.start( filter_term=stream_props['filter_term'], filter_coords=stream_props['filter_coords'], filter_langs=('en',), )
def fetchTwitterData(self): ACCESS_TOKEN = '4493656032-zhJ6hagHhaLy3TjQKb6tSnuO1imDlcOOYlcletE' ACCESS_SECRET = 'cwjjJnqsSQkll4qdPvNMYnQgU3lkOBeo4MJRbuGujAV3u' CONSUMER_KEY = 'O1MENwW8o14QlD4vFxeJSgAld' CONSUMER_SECRET = 'lUkCrhP2ct6j7pATtW7DNbAMJqvxoAulSC6zEQ3d2IM1dDDanR' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) twitter_stream = TwitterStream(auth=oauth) read_tags = ReadTwitterTags() #tags = read_tags.read_twitter_tags("../twitter_reckoner/twitter_tag_filters_stars.py") tags = ['rajini'] #data_writer = DataWriter("/home/dharshekthvel/.tweets") data_writer = DataWriter("/root/data/rajini.tweets") for each_tag in tags: iterator = twitter_stream.statuses.filter(track=each_tag, language="en") tweet_count = 999999 for tweet in iterator: tweet_count -= 1 complete_tweet = json.dumps(tweet) processed_tweet = json.loads(complete_tweet) import datetime now = datetime.datetime.now() if 'text' in processed_tweet.keys(): twitter_data = TwitterData(now.strftime("%Y-%m-%d-%H-%M-%S"), processed_tweet['text'].lower(), "NA", "NA", "NA", "NA", "NOT_ANALYZED") clean_tweet = CleanTweets() rt_removed_tweet = clean_tweet.remove_RT_from_tweet(twitter_data.tweeted_text) cleaned_tweet = clean_tweet.remove_unnecessary_info(rt_removed_tweet) #redis_data = MarchDataToRedis() #redis_data.saveRedisData(twitter_data) print ("T - " + cleaned_tweet) data_writer.appendDataToFile(cleaned_tweet) if tweet_count <= 0: break
def main(): args = parse_arguments() # When using twitter stream you must authorize. auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) # These arguments are optional: stream_args = dict(timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com', **stream_args) tweet_iter = stream.site(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): printNicely(tweet['text']) else: printNicely("-- Some data: " + str(tweet))
def run(self): self.logger.debug("Setting up twitter stream") if not config.TWITTER_TOKEN: self.logger.error("I don't know how to access my twitter stream") return if not config.TWITTER_TOKEN_SECRET: self.logger.error("I don't know how to access my twitter stream") return auth = OAuth( consumer_key='XryIxN3J2ACaJs50EizfLQ', consumer_secret='j7IuDCNjftVY8DBauRdqXs4jDl5Fgk1IJRag8iE', token=config.TWITTER_TOKEN, token_secret=config.TWITTER_TOKEN_SECRET, ) while True: self.logger.info("Connecting to userstream.twitter.com") stream = TwitterStream(auth=auth, domain='userstream.twitter.com') for msg in stream.user(): if msg is None: self.logger.debug("Got blank-line keep alive") elif msg is Timeout: self.logger.debug("Got timeout. Expecting reconnect.") elif msg is HeartbeatTimeout: self.logger.debug("Got heartbeat timeout. Expecting reconnect.") elif msg is Hangup: self.logger.debug("Got hangup notification. Expecting reconnect.") elif 'text' in msg and 'user' in msg: signals.tweet.send(None, **msg) else: self.logger.debug("Unhandled message: %r" % msg) self.logger.info("Lost connection to userstream.twitter.com. Reconnecting in 10s...") eventlet.sleep(10)
def stream_tweets(): """ Stream API implementation of crawling real-time tweets and saving them into a file. """ # filename = str(os.getcwd()) + "/outData/output{:%d%m%y}.txt".format(datetime.date.today()) # Using default Public Stream and stopwords for filter keywords, english # tweets only log.debug("Activating Twitter Stream API") stream = TwitterStream(auth=authKeys, domain="stream.twitter.com", secure=True) stream_iter = stream.statuses.filter(track=(config['tweet']['keywords']), language='en') return stream_iter
def main(): args = parse_arguments() # When using twitter stream you must authorize. auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) # These arguments are optional: stream_args = dict(timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args["track"] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain="userstream.twitter.com", **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain="sitestream.twitter.com", **stream_args) tweet_iter = stream.site(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() i = 0 # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if i > 10: exit(0) if tweet.get("text") and tweet.get("lang") == "en": # printNicely("@" + tweet['user']['screen_name'] + ' [' + str(tweet['retweet_count']) + ']') printNicely(tweet["text"]) i += 1
def ConnectAndGetStream(stream_args, query_args, logger): print "Connecting to Twitter Streaming API..." logger.debug("Connecting to Twitter Streaming API...") auth = get_auth() twitter_stream = TwitterStream(auth=auth) try: tweet_iter = twitter_stream.statuses.filter(**query_args) print "Connected..." logger.debug("Connected...") except Exception as e: print "Error: " + str(e)[20:23] logger.debug("Error: " + str(e)[20:23]) return tweet_iter
def do_state(state, auth, query_args, stream_args, htmlparser): word_count = 0 if state: name = state['display_name'] print() print(name) print("-" * len(name)) word_count = len(name.split()) bbox = state['boundingbox'] bbox = ",".join([bbox[2], bbox[0], bbox[3], bbox[1]]) # SW first query_args['locations'] = bbox stream = TwitterStream(auth=auth, **stream_args) tweet_iter = stream.statuses.filter(**query_args) # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if tweet is None: pass # printNicely("-- None --") # elif tweet is Timeout: # printNicely("-- Timeout --") # elif tweet is HeartbeatTimeout: # printNicely("-- Heartbeat Timeout --") # elif tweet is Hangup: # printNicely("-- Hangup --") elif tweet.get('text'): processed = process_tweet(tweet['text'], query_args['track']) if processed: # & -> & etc. processed = htmlparser.unescape(processed) printNicely(processed) word_count += len(processed.split()) if state: if word_count > 1100: break else: if word_count > 51000: break print()
def main(): args = parse_arguments() # When using twitter stream you must authorize. auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) # These arguments are optional: stream_args = dict(timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com', **stream_args) tweet_iter = stream.site(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() i = 0 # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if i > 10: exit(0) if tweet.get('text') and tweet.get('lang') == 'en': #printNicely("@" + tweet['user']['screen_name'] + ' [' + str(tweet['retweet_count']) + ']') printNicely(tweet['text']) i += 1
def __init__(self, auth, subscription): threading.Thread.__init__(self) stream = TwitterStream(auth=auth, block=True) self.stopped = False if ( subscription["data"].get("isTesting") is not None and subscription["data"].get("isTesting") != "" ): self.testing = subscription["data"].get("isTesting") else: self.testing = False self.endpoint = subscription["endpoint"] if subscription["data"].get("track"): self.iterator = stream.statuses.filter(track=subscription["data"]["track"]) else: self.iterator = stream.statuses.sample()
def main(): args = parse_arguments() auth = OAuth(accessToken, accessTokenSecret, consumerKey, consumerSecret) stream_args = dict(timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() for tweet in tweet_iter: if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): print(tweet['user']['screen_name'] + ": " + tweet['text']) else: printNicely(str(tweet))
def main(): try: filename = sys.argv[1] except: filename = 'tweets.json' out = open(filename, 'a') try: con = import_module(sys.argv[2]) except: con = import_module('config') auth = OAuth(con.TOKEN, con.TOKEN_SECRET, con.CONSUMER_KEY, con.CONSUMER_SECRET) '''stream_args = dict( timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout)''' stream_args = {} query_args = {"locations": con.COORDS} stream = TwitterStream(auth=auth) tweet_iter = stream.statuses.filter(**query_args) for tweet in tweet_iter: if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): tweetJson = { "id": tweet['id_str'], "time": tweet['created_at'], "place": tweet['place'], "coordinates": tweet['coordinates'], "retweets": tweet['retweet_count'], "text": tweet['text'], "hashtags": tweet['entities']['hashtags'] } out.write(json.dumps(tweetJson) + "\n") else: printNicely("-- Some data: " + str(tweet))
def __init__(self): self.stream = TwitterStream( auth=OAuth(ACCESS_KEY, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET), api_version='1.1') self.twitter = Twitter( auth=OAuth(ACCESS_KEY, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET), api_version='1.1') self.tmblr = tumblpy.Tumblpy(app_key=TUMBLR_KEY, app_secret=TUMBLR_SECRET, oauth_token=TOKEN_KEY, oauth_token_secret=TOKEN_SECRET )
"../mustard-mine" ) # Hack: Lift credentials from Mustard Mine if we don't have our own from config import TWITTER_CLIENT_ID, TWITTER_CLIENT_SECRET from twitter import OAuth, Twitter, oauth_dance, read_token_file from twitter.stream import TwitterStream, Timeout, HeartbeatTimeout, Hangup from twitter.api import TwitterHTTPError CREDENTIALS_FILE = os.path.expanduser('~/.cherami-login') if not os.path.exists(CREDENTIALS_FILE): oauth_dance("Cher Ami", TWITTER_CLIENT_ID, TWITTER_CLIENT_SECRET, CREDENTIALS_FILE) auth = OAuth(*read_token_file(CREDENTIALS_FILE), TWITTER_CLIENT_ID, TWITTER_CLIENT_SECRET) twitter = Twitter(auth=auth) stream = TwitterStream(auth=auth, timeout=60) who_am_i = twitter.account.verify_credentials() my_id = who_am_i["id"] displayed_tweets = {"": 0} def fix_extended_tweet(tweet): # Streaming mode doesn't include the full_text. It will show short tweets # with just "text", and longer ones with an "extended_tweet" that includes # the full text. if "extended_tweet" in tweet: tweet.update(tweet["extended_tweet"]) if "full_text" not in tweet: tweet["full_text"] = tweet["text"] replace = {
def stream(domain, args, name='Rainbow Stream'): """ Track the stream """ # The Logo art_dict = { c['USER_DOMAIN']: name, c['PUBLIC_DOMAIN']: args.track_keywords, c['SITE_DOMAIN']: name, } if c['ASCII_ART']: ascii_art(art_dict[domain]) # These arguments are optional: stream_args = dict( timeout=0.5, # To check g['stream_stop'] after each 0.5 s block=True, heartbeat_timeout=c['HEARTBEAT_TIMEOUT'] * 60) # Track keyword query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords # Get stream stream = TwitterStream(auth=authen(), domain=domain, **stream_args) try: if domain == c['USER_DOMAIN']: tweet_iter = stream.user(**query_args) elif domain == c['SITE_DOMAIN']: tweet_iter = stream.site(**query_args) else: if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Block new stream until other one exits StreamLock.acquire() g['stream_stop'] = False for tweet in tweet_iter: if tweet is None: printNicely("-- None --") elif tweet is Timeout: if (g['stream_stop']): StreamLock.release() break elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") guide = light_magenta("You can use ") + \ light_green("switch") + \ light_magenta(" command to return to your stream.\n") guide += light_magenta("Type ") + \ light_green("h stream") + \ light_magenta(" for more details.") printNicely(guide) sys.stdout.write(g['decorated_name'](c['PREFIX'])) sys.stdout.flush() elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): draw( t=tweet, keyword=args.track_keywords, check_semaphore=True, fil=args.filter, ig=args.ignore, ) # Current readline buffer current_buffer = readline.get_line_buffer().strip() # There is an unexpected behaviour in MacOSX readline + Python 2: # after completely delete a word after typing it, # somehow readline buffer still contains # the 1st character of that word if current_buffer and g['cmd'] != current_buffer: sys.stdout.write(g['decorated_name'](c['PREFIX']) + unc(current_buffer)) sys.stdout.flush() elif not c['HIDE_PROMPT']: sys.stdout.write(g['decorated_name'](c['PREFIX'])) sys.stdout.flush() elif tweet.get('direct_message'): print_message(tweet['direct_message'], check_semaphore=True) except TwitterHTTPError: printNicely('') printNicely( magenta( "We have maximum connection problem with twitter'stream API right now :(" ))
from dotenv import load_dotenv import os from twitter import Twitter from twitter.oauth import OAuth from twitter.stream import TwitterStream from twitter.util import printNicely load_dotenv('.env') auth = OAuth(os.environ['TOKEN'], os.environ['TOKEN_SECRET'], os.environ['CONSUMER_KEY'], os.environ['CONSUMER_SECRET']) stream = TwitterStream(auth=auth) keyword = 'Florida' iterator = stream.statuses.filter(track=keyword) for tweet in iterator: printNicely(tweet['text'])
import keys track_word = 'にゃーん' except_word = 'がにゃーんしました' TOKEN_FILE = os.path.expanduser('./.token') if not os.path.exists(TOKEN_FILE): oauth_dance(keys.APP_NAME, keys.CONSUMER_KEY, keys.CONSUMER_SECRET, TOKEN_FILE) token, token_secret = read_token_file(TOKEN_FILE) auth = OAuth(token, token_secret, keys.CONSUMER_KEY, keys.CONSUMER_SECRET) t = Twitter(auth=auth) twitter_userstream = TwitterStream(auth=auth, domain='userstream.twitter.com') tweet_itr = twitter_userstream.user() for tweet in tweet_itr: if tweet.get('text'): if track_word in tweet['text'] and not except_word in tweet['text']: timestamp = datetime.fromtimestamp( int(tweet['timestamp_ms']) / 1000) tweet_text = str( timestamp) + '\n' + tweet['user']['name'] + except_word print(tweet_text) if not tweet['user']['protected']: try: t.statuses.update(status=tweet_text) except: pass
def stream(domain, args, name='Rainbow Stream'): """ Track the stream """ # The Logo art_dict = { c['USER_DOMAIN']: name, c['PUBLIC_DOMAIN']: args.track_keywords, c['SITE_DOMAIN']: name, } if c['ASCII_ART']: ascii_art(art_dict[domain]) # These arguments are optional: stream_args = dict( timeout=args.timeout, block=False, heartbeat_timeout=args.heartbeat_timeout) # Track keyword query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords # Get stream stream = TwitterStream( auth=authen(), domain=domain, **stream_args) try: if domain == c['USER_DOMAIN']: tweet_iter = stream.user(**query_args) elif domain == c['SITE_DOMAIN']: tweet_iter = stream.site(**query_args) else: if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Block new stream until other one exits StreamLock.acquire() g['stream_stop'] = False for tweet in tweet_iter: if(g['stream_stop']): StreamLock.release() break if tweet is None: pass elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): draw( t=tweet, keyword=args.track_keywords, check_semaphore=True, fil=args.filter, ig=args.ignore, ) # Current readline buffer current_buffer = readline.get_line_buffer().strip() # There is an unexpected behaviour in MacOSX readline: # after completely delete a word after typing it, # somehow readline buffer still contains # the 1st character of that word if g['previous_cmd'] != current_buffer: if len(current_buffer) == 1: current_buffer = '' g['OSX_readline_bug'] = True sys.stdout.write( g['decorated_name'](c['PREFIX']) + current_buffer) sys.stdout.flush() elif tweet.get('direct_message'): print_message(tweet['direct_message'], check_semaphore=True) except TwitterHTTPError: printNicely('') printNicely( magenta("We have maximum connection problem with twitter'stream API right now :("))
def start_twitter_stream(): auth = initiate_twitter_api() twitter = Twitter(auth=auth, retry=True) follow = [] husker_coaches_list = twitter.lists.members( owner_screen_name="ayy_gbr", slug="Nebraska-Football-Coaches") husker_media_list = twitter.lists.members(owner_screen_name="ayy_gbr", slug="Husker-Media") husker_lists = [husker_coaches_list, husker_media_list] for list in husker_lists: for member in list["users"]: follow.append(member["id_str"]) follow_str = ",".join(follow) track_str = "trump" stream_args = dict(auth=auth, timeout=60, block=False, heartbeat_timeout=60) stream = TwitterStream(**stream_args) try: query_args = dict( # follow=follow_str, track=track_str, language="en", retry=True) tweet_iter = stream.statuses.filter(**query_args) print("Waiting for a tweet...") for tweet in tweet_iter: print("\tRL Limit:", tweet.get("rate_limit_limit")) print("\tRL Remaining:", tweet.get("rate_limit_remaining")) print("\tRL Reset:", tweet.get("rate_limit_reset")) if tweet is None: print("-- None --") elif tweet is Timeout: print("-- Timeout --") elif tweet is HeartbeatTimeout: print("-- Heartbeat Timeout --") elif tweet is Hangup: print("-- Hangup --") elif tweet.get('text'): tweet_author = tweet["user"]["screen_name"] print("Sending a tweet!") try: dt = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y') except KeyError: dt = datetime.now() print(f"Author: @{tweet['user']['screen_name']}") print( f"Link: https://twitter.com/{tweet['user']['screen_name']}/status/{tweet['id']}" ) print( f"Text: {tweet['text']}\nCreated At: {dt.strftime('%B %d, %Y at %H:%M%p')}" ) else: print("-- Some data: " + str(tweet)) except TwitterHTTPError as e: print(e) print("Waiting 15 minutes and then restarting") import time time.sleep(15 * 60) print("Restarting the twitter stream") start_twitter_stream()
def stream(domain, args, name='Rainbow Stream'): """ Track the stream """ # The Logo art_dict = { c['USER_DOMAIN']: name, c['PUBLIC_DOMAIN']: args.track_keywords, c['SITE_DOMAIN']: name, } if c['ASCII_ART']: ascii_art(art_dict[domain]) # These arguments are optional: stream_args = dict( timeout=0.5, # To check g['stream_stop'] after each 0.5 s block=True, heartbeat_timeout=c['HEARTBEAT_TIMEOUT'] * 60) # Track keyword query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords # Get stream stream = TwitterStream( auth=authen(), domain=domain, **stream_args) try: if domain == c['USER_DOMAIN']: tweet_iter = stream.user(**query_args) elif domain == c['SITE_DOMAIN']: tweet_iter = stream.site(**query_args) else: if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Block new stream until other one exits StreamLock.acquire() g['stream_stop'] = False for tweet in tweet_iter: if tweet is None: printNicely("-- None --") elif tweet is Timeout: if(g['stream_stop']): StreamLock.release() break elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") guide = light_magenta("You can use ") + \ light_green("switch") + \ light_magenta(" command to return to your stream.\n") guide += light_magenta("Type ") + \ light_green("h stream") + \ light_magenta(" for more details.") printNicely(guide) sys.stdout.write(g['decorated_name'](c['PREFIX'])) sys.stdout.flush() StreamLock.release() break elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): draw( t=tweet, keyword=args.track_keywords, humanize=False, check_semaphore=True, fil=args.filter, ig=args.ignore, ) # Current readline buffer current_buffer = readline.get_line_buffer().strip() # There is an unexpected behaviour in MacOSX readline + Python 2: # after completely delete a word after typing it, # somehow readline buffer still contains # the 1st character of that word if current_buffer and g['cmd'] != current_buffer: sys.stdout.write( g['decorated_name'](c['PREFIX']) + str2u(current_buffer)) sys.stdout.flush() elif not c['HIDE_PROMPT']: sys.stdout.write(g['decorated_name'](c['PREFIX'])) sys.stdout.flush() elif tweet.get('direct_message'): print_message(tweet['direct_message'], check_semaphore=True) except TwitterHTTPError: printNicely('') printNicely( magenta("We have maximum connection problem with twitter'stream API right now :("))
def stream(domain, args, name='Rainbow Stream'): """ Track the stream """ # The Logo art_dict = { c['USER_DOMAIN']: name, c['PUBLIC_DOMAIN']: args.track_keywords, c['SITE_DOMAIN']: 'Site Stream', } if g['ascii_art']: ascii_art(art_dict[domain]) # These arguments are optional: stream_args = dict( timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) # Track keyword query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords # Get stream stream = TwitterStream( auth=authen(), domain=domain, **stream_args) if domain == c['USER_DOMAIN']: tweet_iter = stream.user(**query_args) elif domain == c['SITE_DOMAIN']: tweet_iter = stream.site(**query_args) else: if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Iterate over the stream. try: for tweet in tweet_iter: if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): draw( t=tweet, iot=args.image_on_term, keyword=args.track_keywords, fil=args.filter, ig=args.ignore, ) except: printNicely( magenta("I'm afraid we have problem with twitter'S maximum connection.")) printNicely(magenta("Let's try again later."))
def DecideUserID(userid): global userlist friend = [] friendfriend = [] nextuserid = -1 #ツイッターのストリームからuseridを取る。ランダムにとるため try: streams = TwitterStream(auth=AUTH) lis = streams.statuses.sample() cnt = 0 userIDs = [] for tweet in lis: # stop after getting 100 tweets. You can adjust this to any number if cnt == 100: break cnt += 1 if 'user' in tweet: userIDs.append(tweet['user']['id']) userIDs = list(set(userIDs)) # To remove any duplicated user IDs for i in userIDs: if i in userlist: print("すでにクロールしたユーザです") else: try: lang = api.users.show(user_id=i) #botが名前に入っているかどうか判定する botflag = 0 for char in lang["screen_name"]: if char == "b": botflag = 1 elif botflag == 1 and char == "o": botflag = 2 elif botflag == 2 and char == "t": botflag = 3 break else: botflag = 0 if botflag == 3: print("botをみつけたよ") else: if lang["lang"] == "ja": print("ランダムにユーザを決めたよ") return i except: print("言語判定のGetUser失敗") except: print("GetStream失敗") #ランダムに取れなかったら友達の友達からとる try: friend = api.friends.ids(user_id=userid, count=10) except: print("GetFriend失敗") return -1 i = len(friend) while i != 0: try: friendfriend = api.friends.ids(user_id=friend[i - 1], count=100) for j in range(len(friendfriend) - 1, -1, -1): if friendfriend[j] in userlist: print("すでにクロールしたユーザです") else: try: lang = api.users.show(user_id=friendfriend[j]) if lang["lang"] == "ja": print("友達の友達を選んだよ") return friendfriend[j] except: print("GetUser失敗") except: print("GetFriend失敗") i = i - 1 return nextuserid
def main(): args = parse_arguments() args.track_keywords = 'temer' time = strftime("%Y%m%d_%H%M%S", gmtime()) # When using twitter stream you must authorize. # auth = OAuth(args.token, args.token_secret, # args.consumer_key, args.consumer_secret) auth = OAuth( "", # OAUTH_TOKEN "", # OAUTH_SECRET, "", # CONSUMER_KEY, "" # CONSUMER_SECRET ) # These arguments are optional: stream_args = dict(timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords if args.user_stream: stream = TwitterStream(auth=auth, domain='userstream.twitter.com', **stream_args) tweet_iter = stream.user(**query_args) elif args.site_stream: stream = TwitterStream(auth=auth, domain='sitestream.twitter.com', **stream_args) tweet_iter = stream.site(**query_args) else: stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() # Iterate over the sample stream. for tweet in tweet_iter: # You must test that your tweet has text. It might be a delete # or data message. if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): t_data = 'id: ' + str(tweet['id']) t_data += '\ndate: ' + tweet['created_at'] t_data += '\ntext: ' + tweet['text'] + '\n' save(time, t_data) print(tweet['id']) printNicely(tweet['created_at']) printNicely(tweet['text']) print('\n') else: printNicely("-- Some data: " + str(tweet))
def main(): os.environ["https_proxy"] = os.environ["http_proxy"] args = parse_arguments() """ class arg: token = '2360463842-4dvRToWEoFCFkIj9Dg9fDrREZytBRhpiVan1SVl' token_secret = 'H7xC8EBkiibjbd7yFgLlK5xRH6qq6Byxr8wp07xhwbDCk' consumer_key = 'ogAcB1y6qPLmntrY2KCXvgggF' consumer_secret = '9YN11mY0VWltFiOfxp6EtTu5uY8dVxHMsvMZOC3AMcb850568B' no_block = True track_keywords = 'New York' heartbeat_timeout = 90 timeout = 10 user_stream = False site_stream = False args = arg() """ tweet_keys = [ 'in_reply_to_status_id', 'in_reply_to_screen_name', 'in_reply_to_user_id', 'favorite_count', 'retweet_count', 'text', 'id_str', 'created_at' ] user_keys = ['id_str', 'screen_name', 'name', 'location'] user_keys_io = { 'id_str': 'user_id_str', 'screen_name': 'user_screen_name', 'name': 'user_name', 'location': 'user_location' } auth = OAuth(args.token, args.token_secret, args.consumer_key, args.consumer_secret) stream_args = dict(timeout=args.timeout, block=not args.no_block, heartbeat_timeout=args.heartbeat_timeout) query_args = dict() if args.track_keywords: query_args['track'] = args.track_keywords stream = TwitterStream(auth=auth, **stream_args) if args.track_keywords: tweet_iter = stream.statuses.filter(**query_args) else: tweet_iter = stream.statuses.sample() producer = KafkaProducer( bootstrap_servers=['localhost:6667'], value_serializer=lambda m: json.dumps(m).encode('ascii')) cnt = 0 for tweet in tweet_iter: if tweet is None: printNicely("-- None --") elif tweet is Timeout: printNicely("-- Timeout --") elif tweet is HeartbeatTimeout: printNicely("-- Heartbeat Timeout --") elif tweet is Hangup: printNicely("-- Hangup --") elif tweet.get('text'): #printNicely(tweet['text']) tweet_ = {k: tweet[k] for k in tweet_keys} tweet_.update( {user_keys_io[k]: tweet['user'][k] for k in user_keys}) future = producer.send('twitter_api_stream', tweet_) cnt += 1 if not cnt % 100: print('Count: ' + str(cnt))