def getTweets(handle, year, num): print 'Getting some Tweets for you...' pager = TwitterPager( api, currentAPI, { 'screen_name': handle, 'count': num, 'include_rts': False, 'exclude_replies': True, '' 'since_id': ID_Ranges['start'][year], 'max_id': ID_Ranges['end'][year] }) count = 0 for item in pager.get_iterator(wait=3.5): if count >= num: break # limit the number of tweets for the moment if 'text' in item: getContent(item) count = count + 1 elif 'message' in item: print(item['message']) break # Add ListofTweets[] to specific candidate try: for c in candidateList: if (c.name == listOfTweets[0].author) and (c.electionYear == year): c.addTweets(listOfTweets) break except: print 'Error: Could not Find Candidate'
def get_twitter_data(start_time): """ Collects tweets from prominent forex accounts over specified interval. Args: start_time: String of RFC33339 formatted date Returns: List with dictionaries containing tweet text, when the were created, and public metrics """ # Get tweets in batches of 100 for speed # 5 second delay between pages to prevent rate limiting pager = TwitterPager( api, 'tweets/search/recent', { 'query': 'from:FXstreetNews OR from:forexcom', 'tweet.fields': 'public_metrics,created_at', 'start_time': str(start_time), 'max_results': 100 }) tweet_data = [] counter = 0 for item in pager.get_iterator(new_tweets=False): tweet_data.append({ "text": item['text'], "created_at": item['created_at'] }) print(item) counter += 1 print(counter) return tweet_data
def get_tweets(self): params = dict( user_id=self.user.id_str, since_id=self.init_tweet.id_str, count=200, include_rts=True, tweet_mode='extended', ) if self.final_tweet: params['max_id'] = self.final_tweet.id_str response = self.twitter_api.request( 'statuses/user_timeline', params, ) response.response.raise_for_status() if len(response.json()) > 199: pager = TwitterPager(self.twitter_api, 'statuses/user_timeline', params) tweets = [] for i in pager.get_iterator(wait=3.5): tweets.append(i) else: tweets = response.json() return tweets
def extract_tweets(consumer_key, consumer_secret, access_token_key, access_token_secret, label): api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) PRODUCT = 'fullarchive' LABEL = label SEARCH_TERM = '(bus OR smrt OR transport OR mrt OR lrt OR public transport OR sbs OR sbs transit OR transitlink OR lta OR towertransit OR land transport authority) lang:en place:Singapore' # Adding place automatially removes retweets pager = TwitterPager( api, 'tweets/search/%s/:%s' % (PRODUCT, LABEL), { 'query': SEARCH_TERM, 'maxResults': 500, 'toDate': 2020102300001, 'fromDate': 201501010001 }) responseValues = [] for item in pager.get_iterator(): if 'text' in item: responseValues.append({ 'created_at': item['created_at'], 'tweet_coordinates': item['place'], 'text': item['text'] }) elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break tweets = pd.DataFrame(responseValues) # Changing the timing tweets['created_at'] = tweets['created_at'].apply(utc_former) return tweets
async def dril(self, ctx, num=1, distance=500): # await self.bot.say("Give me a moment!") api = TwitterAPI(k1, k2, auth_type='oAuth2') SCREEN_NAME = 'dril' pager = TwitterPager(api,'statuses/user_timeline', {'screen_name':SCREEN_NAME, 'count':200}) count = 0 tweets = [] for item in pager.get_iterator(wait=0.1): if 'text' in item: count = count + 1 # await self.bot.say(str(count) + ". " + item['text']) tweets.append(item['text']) if (count > distance): break elif 'message' in item: await ctx.send(item['message']) break for i in range(0, num): if not tweets: break msg = random.choice(tweets) await ctx.send(msg) tweets.remove(msg)
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): """Get tweets containing any words in 'word_list'.""" words = ' OR '.join(word_list) params = {'q': words, 'count': count} if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius rate_count = 0 while True: pager = TwitterPager(api, 'search/tweets', params) rate_count += 1 for item in pager.get_iterator(): if 'text' in item: if not no_retweets or not item.has_key('retweeted_status'): process_tweet(item, photo_dir, stalk, no_retweets) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message']) if rate_count == 13: print( "================================ Sleeping ==================================" ) time.sleep(600) print( "================================ Awake ==================================" ) rate_count = 0
def search_tweets(the_consumer_key, the_consumer_secret, the_access_token_key, the_access_token_secret): api = TwitterAPI(consumer_key=the_consumer_key, consumer_secret=the_consumer_secret, access_token_key=the_access_token_key, access_token_secret=the_access_token_secret) r = TwitterPager(api, 'search/tweets', { 'q': 'pizza', 'count': 10, 'lang': 'en' }) for item in r.get_iterator(): # print item if 'id' in item: print item['id'] if 'text' in item: print item['text'] # if 'entities' in item: # print item['entities'] if 'message' in item and item['code'] == 88: print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'] break print '\r\n\r\n\r\n'
def __get_tweets(self, method, max_date): r = TwitterPager(self.__api, method, {'count': 200}) ret = [] for item in r.get_iterator(): if dt_helpers.convert_to_date(item["created_at"]) < max_date: ret.append(item) return ret
def get_tweets(self, q, count=5): tweets = [] r = TwitterPager(self.api, 'search/tweets', {'q': q, 'count': count}) for item in r.get_iterator(): if 'text' in item: tweets.append(item['text']) elif 'message' in item and item['code'] == 88: print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'] break return tweets
def get_all_tweets(screen_name): pager = TwitterPager(api, 'statuses/user_timeline', {'screen_name': screen_name, 'count': 200}) alltweets = pager.get_iterator(wait=3.5) outtweets = [ [screen_name, tweet['id_str'], pd.to_datetime(tweet['created_at']), tweet['user']['location'], tweet['retweet_count'], tweet['favorite_count'], tweet['lang'], tweet['text']] for tweet in alltweets] df = pd.DataFrame(outtweets, columns=['user_name', 'id', 'create_time', 'geo', 'retweets','favorite_count', 'language', 'text']) df.to_csv('results.csv', index=False) print('finish')
def rank_old_hashtags(api, word_list, n): words = ' OR '.join(word_list) count = {} while True: pager = TwitterPager(api, 'search/tweets', {'q':words, 'count':COUNT}) for item in pager.get_iterator(): if 'text' in item: process_tweet(item['text'], count, n) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message'])
def get_search(self): next = '' while True: tweets = TwitterPager( self.api, 'tweets/search/%s/:%s' % (self.PRODUCT, self.LABEL), { 'query': self.search_term, 'fromDate': self.fromDate, 'toDate': self.toDate }) print(tweets) for tweet in tweets.get_iterator(): if 'text' in tweet: self.full_tweet_list.append(tweet)
def do_search(api,db,keyword_query,geocode,from_id,to_id,next_id): #r = api.request('statuses/filter', {'locations': '112.5,-37.5,154.1,-12.8'}) next_id=-1 cur_id=-1 if from_id==-1: from_id=None if to_id==-1: to_id=0 count=0 pager = TwitterPager(api, 'search/tweets', {'q': keyword_query, 'geocode': geocode, 'count': '100', 'max_id': str(from_id), 'since_id' : str(to_id)}) while True: try: for item in pager.get_iterator(): #print(item) if 'text' in item: #try: if True: #print item["id"] cur_id=int(item["id"]) #if next_id != -1, we run in re-start mode, don't reset next_id #else we need to update next_id when the first item arrives in this iteration #and next iteration's to_id will be set to next_id of this iteration if next_id==-1: next_id=cur_id if cur_id<=to_id: break info=get_dict_object_from_tweet(item) if not info: print "Error parsing the tweet, ignore it" continue #put the data in the db db.put(info) count+=1 if count % 1000 == 0: print count #print item["id"],"ok" #print(info["post_text"]) #persist the progress to ensure we can resume the harvester from here progress.update(cur_id,to_id,next_id) elif 'message' in item: # something needs to be fixed before re-connecting raise Exception(item['message']) return count except TwitterAPI.TwitterError.TwitterRequestError,e: if e.status_code==429: print ("Too Many Requests, now sleeping...") sleep(60) else: raise e
def tweets(query='', filename='', q=''): ''' Collect Tweets by a user (max. 3200) or through a search query (max. last 10 days). ''' if filename == '': if q == '' or q is None: filename = '{0}/{1}.tweets.jsonl'.format(DIR, encode_query(query)) else: filename = '{0}/{1}.tweets.jsonl'.format(DIR, encode_query(q)) if q == '' or q is None: click.echo('Requesting Tweets by @{}'.format(query)) r = TwitterPager(api, 'statuses/user_timeline', { 'screen_name': query, 'count': 200, 'tweet_mode': 'extended' }) else: click.echo('Requesting Tweets with the search query {}'.format(q)) r = TwitterPager(api, 'search/tweets', { 'q': q, 'count': 100, 'tweet_mode': 'extended' }) n = 0 with open(filename, 'a', encoding='utf-8') as f: for item in r.get_iterator(wait=2): n += 1 if n % 1000 == 0: click.echo('{0} Tweets received. Oldest from {1}.'.format( n, item['created_at'])) if 'full_text' in item: json.dump(item, f) f.write('\n') elif 'message' in item and item['code'] == 88: click.echo('SUSPEND, RATE LIMIT EXCEEDED: {}\n'.format( item['message'])) break click.echo('Saved {0} Tweets in {1}'.format(n, filename)) return
def get_followers(screen_names, connection, pager=True): follower_dict = {} for screen_name in screen_names: if pager is False: followers = connection.request('followers/ids', {'screen_name': screen_name}) follower_dict[screen_name] = followers else: followers = [] r = TwitterPager(connection, 'followers/ids', {'screen_name': screen_name}) for item in r.get_iterator(): followers.append(item) follower_dict[screen_name] = followers return follower_dict
def getTweetCount(q, p=False, debug=False): auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = TwitterAPI.TwitterAPI(consumer_key, consumer_secret, access_token, access_token_secret) #today's info a = time.time() timeStamp = datetime.datetime.utcnow().date() tDay = timeStamp.day tMonth = timeStamp.strftime("%b") api = TwitterAPI.TwitterAPI(consumer_key, consumer_secret, access_token, access_token_secret) count = 0 r = TwitterPager(api, 'search/tweets', {'q': q, 'count': 100}) for item in r.get_iterator(wait=6): time_stamp = item['created_at'] day = int(time_stamp[8:10]) if (tDay != day and tDay - 1 != day): break if 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break if (tDay - 1 == day): count += 1 if (p): print("Term: " + q + " on " + item["created_at"]) if (debug): b = time.time() c = int(b - a) print("\nThis took " + str(round(c / 60, 2)) + " minutes") return count #res = tweet.getTweetCount("qwertyuiop", False,True) #print(res)
def tweets(query='', filename='', q=''): ''' Collect Tweets by a user (max. 3200) or through a search query (max. last 10 days). ''' if filename == '': if q == '' or q is None: filename = '{}.tweets.jsonl'.format( encode_query(query)) else: filename = '{}.tweets.jsonl'.format(encode_query(q)) if q == '' or q is None: click.echo('Requesting Tweets by @{}'.format(query)) r = TwitterPager(api, 'statuses/user_timeline', {'screen_name': query, 'count': 200, 'tweet_mode': 'extended'}) else: click.echo('Requesting Tweets with the search query {}'.format(q)) r = TwitterPager(api, 'search/tweets', {'q': q, 'count': 100, 'tweet_mode': 'extended'}) n = 0 with open(filename, 'a', encoding='utf-8') as f: for item in r.get_iterator(wait=2): n += 1 if n % 1000 == 0: click.echo('{0} Tweets received. Oldest from {1}.'.format( n, item['created_at'])) if 'full_text' in item: json.dump(item, f) f.write('\n') elif 'message' in item and item['code'] == 88: click.echo( 'SUSPEND, RATE LIMIT EXCEEDED: {}\n'.format(item['message'])) break click.echo('Saved {0} Tweets in {1}'.format(n, filename)) return
def query_api(): o = TwitterOAuth.read_file() api = TwitterAPI( o.consumer_key, o.consumer_secret, o.access_token_key, o.access_token_secret, auth_type='oAuth2', api_version='2' ) pager = TwitterPager( api, 'tweets/search/all', { 'query':QUERY, 'tweet.fields': ','.join([ 'author_id', 'created_at', 'public_metrics', 'referenced_tweets', 'in_reply_to_user_id', ]), 'expansions': ','.join([ 'author_id', 'referenced_tweets.id', 'referenced_tweets.id.author_id', 'in_reply_to_user_id', 'attachments.media_keys', ]), 'media.fields': 'url', 'user.fields': 'username,name', 'start_time': '2021-02-25T06:00:00Z', 'end_time': '2021-03-29T12:00:00Z', 'max_results': 500 } ) return pager.get_iterator(wait=2)
def Request_Tofile(Api, file_name): ''' Function using TwitterPager module to paginate of response from Twitter REST API and write it to a json-file. IN: file_name represent the name and format of file from which we save all the tweets which are in JSON format OUT: file_name of the saved Json data.''' temp_list = [] Request = TwitterPager( Api, 'tweets/search/fullarchive/:Fullarchive', { 'query': '(Rålambshovsparken OR #Rålambshovsparken) -RT lang:sv', 'maxResults': '100', 'fromDate': '201501010000', 'toDate': '201912310000' }) for item in Request.get_iterator(): temp_list.append(item) temp_dict = {'statuses': temp_list} with open(file_name, 'w') as file: json.dump(temp_dict, file, indent=2) #easier to read return file_name
def get_timeline(SCREEN_NAME, max): with open("keys.txt", 'r') as f: consumer_key = f.readline() consumer_key = consumer_key.rstrip("\n") consumer_secret = f.readline() api = TwitterAPI(consumer_key, consumer_secret, auth_type='oAuth2') pager = TwitterPager(api, 'statuses/user_timeline', { 'screen_name': SCREEN_NAME, 'count': 200 }) tweets = [] count = 0 for item in pager.get_iterator(wait=3.5): if 'text' in item: count += 1 tweets.append(item['text']) if count > (max - 1): break return tweets
def extract_tweets_api(title, keywords, start_date=None, search='standard', pg_count=100): import pandas as pd import numpy as np from TwitterAPI import TwitterAPI from TwitterAPI import TwitterPager import datetime import json #from reply_scraping import reply_scrap consumer_key = 'lu4NOuUBJoqddUXqQbMEwqWLR' consumer_secret = 'gjydYC0PTbWnNZKrN4bscGr1hUcDS6V5NlzPU3n8n3wUciG2Z8' access_token_key = '1038416706450935808-ahzZtl4lrS1kNjIvgBwCnwsOxKqR1y' access_token_secret = 'oy8TZTD7w7RWwFmuOo3SLPjDFyf6XOfMGXxJ7FJoz2CKo' api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) if search == 'standard': endpoint = 'search/tweets' if search == '30day': endpoint = 'tweets/search/30day/:Testing' if search == 'fullarchive': endpoint = 'tweets/search/fullarchive/:Testing' start = datetime.datetime.now() count = 0 for word in keywords: print("searching keyword: " + word) if search == 'standard': pager = TwitterPager( api, endpoint, { 'q': word, 'count': pg_count, 'lang': 'en', 'tweet_mode': 'extended' }) else: if start_date == None: start_date = "200603210000" pager = TwitterPager(api, endpoint, { 'query': word, 'maxResults': pg_count, 'fromDate': start_date }) for payload in pager.get_iterator(wait=1): if count == 0: # extract tweets into data frame tweet_df = pd.DataFrame(pd.Series(payload)).transpose() tweet_df['keyword'] = word count = count + 1 print('getting tweet page ' + str(count)) else: row = pd.Series(payload) row['keyword'] = word tweet_df = tweet_df.append(row, ignore_index=True, sort=False) tweet_df = tweet_df.drop_duplicates('id') # obtain tweets in reply to # add number of replies #tweet_df = reply_scrap(tweet_df) return (tweet_df)
# code to scrape Twitter API import re import os import csv import itertools import collections from TwitterAPI import TwitterAPI, TwitterPager # File to write file = csv.writer( open("die-hard-tweets-no-retweets.csv", "w+", encoding="utf-8")) # Initialize Twitter API api = TwitterAPI('*key*', '*secret key*', '*access key*', '*secret access key') r = TwitterPager(api, 'search/tweets', { 'q': 'Die Hard Christmas-filter:retweets', 'tweet_mode': 'extended' }) # Write to file for item in r.get_iterator(): row = item['full_text'] if 'full_text' in item else '' row = row.replace("\n", " ") print(row) file.writerow([row])
def count_prayers(self, new=False): # Count past prayers until the date of the incident if not new and self.counter.last_id == 0: r = TwitterPager(self.api, 'search/tweets', { 'q': self.counter.words, 'count': 100 }) print("first iterator") for item in r.get_iterator(wait=6): if 'id_str' in item: tweet_id = int(item['id_str']) if tweet_id > self.counter.last_id: self.counter.last_id = tweet_id self.counter.initial_tweet = tweet_id self.counter.update_initial_id() elif tweet_id < self.counter.last_id: self.counter.last_id = tweet_id self.counter.update_id() # Make sure we haven't reached a tweet that occured before the incident if 'created_at' in item: tweet_date = datetime.datetime.strptime( item['created_at'], '%a %b %d %H:%M:%S +0000 %Y') print(tweet_date) if tweet_date < self.counter.shooting_date: break # Process tweet unless we reach an API call limit if 'text' in item: self.counter.process_tweet(item['text']) elif 'message' in item and item['code'] == 88: print('\n*** SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break print("Tweets Checked " + str(self.counter.tweetsChecked)) print(self.counter.total) #API has partially been parsed continue searching into the past elif not new and self.counter.last_id > 0: r = TwitterPager( self.api, 'search/tweets', { 'q': self.counter.words, 'count': 100, 'max_id': self.counter.last_id }) print("pickup iterator") for item in r.get_iterator(wait=6): if 'id_str' in item: tweet_id = int(item['id_str']) if tweet_id < self.counter.last_id: self.counter.last_id = tweet_id self.counter.update_id() # Make sure we haven't reached a tweet that occured before the incident if 'created_at' in item: tweet_date = datetime.datetime.strptime( item['created_at'], '%a %b %d %H:%M:%S +0000 %Y') print(tweet_date) if tweet_date < self.counter.shooting_date: break # Process tweet unless we reach an API call limit if 'text' in item: self.counter.process_tweet(item['text']) elif 'message' in item and item['code'] == 88: print('\n*** SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break print("Tweets Checked " + str(self.counter.tweetsChecked)) print(self.counter.total) # Count prayers from last checked ID and upto live prayers about the incident else: print("new iterator") r = TwitterPager( self.api, 'search/tweets', { 'q': self.counter.words, 'count': 100, 'since_id': self.counter.last_id }) for item in r.get_iterator(wait=6, new_tweets=True): if 'id_str' in item: tweet_id = int(item['id_str']) if tweet_id > self.counter.last_id: self.counter.last_id = tweet_id self.counter.update_id() if 'text' in item: print(item['created_at']) self.counter.process_tweet(item['text']) elif 'message' in item and item['code'] == 88: print('\n*** SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break print("Tweets Checked " + str(self.counter.tweetsChecked)) print(self.counter.total)
def photogallary(request): if request.method == "POST": form = PhotoForm(request.POST) if form.is_valid(): photo = form.save(commit=False) photo.published_date = timezone.now() post = Photoa.objects.all().filter(name=photo.tag) if not post: api = TwitterAPI( 'X2XG271rJPmdxtVVMz1ejtxMZ', 'jvGO68wXnnlC5gw3kWdpqZ7NkQwze2eDOxRFnIjZLQDY2iZ1vO', '998624117602435073-8obGWt14pP19zD5ZtRjdbGuVZ7JTgCg', 'nZVJjHP1w1FZqr245PRcvdrYCpZeH2rKWxmaYsmcXUdLX') r = TwitterPager(api, 'search/tweets', { 'q': photo.tag, 'count': 10 }) list = [] for item in r.get_iterator(wait=20): try: if 'text' in item: images = item['extended_entities']['media'] for image in images: if image['type'] == 'video': #imagelink = image['media_url'] for video in image["video_info"][ "variants"]: imagelink = video["url"] url = imagelink p = url.split('/')[-1] l = p.split('?')[0] q = l.split('.')[-1] if q == 'mp4': list.extend(imagelink) req = urllib.request.Request(url) resp = urllib.request.urlopen(req) respData = resp.read() from django.core.files.base import ContentFile f2 = ContentFile(respData) fs = default_storage filename = fs.save( url.split('/')[-1].split('?') [0], f2) pob = Photoa() pob.name = photo.tag pob.img.save(filename, f2, save=False) pob.url_img = url pob.save(True) photo.save() elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break except KeyError: pass post = Photoa.objects.all().filter(name=photo.tag) return render(request, 'blog/display.html', {'list': post}) list1 = [] for p in post: list1.extend(p.url_img) return render(request, 'blog/display.html', {'list': post}) else: form = PhotoForm() return render(request, 'blog/photosearch.html', {'form': form})
# Print a user's timeline. This will get up to 3,200 tweets, which # is the maximum the Twitter API allows. from TwitterAPI import TwitterAPI, TwitterPager SCREEN_NAME = 'TheTweetOfGod' api = TwitterAPI(<consumer key>, <consumer secret>, auth_type='oAuth2') pager = TwitterPager(api, 'statuses/user_timeline', {'screen_name':SCREEN_NAME, 'count':200}) count = 0 for item in pager.get_iterator(wait=3.5): if 'text' in item: count = count + 1 print(count, item['text']) elif 'message' in item: print(item['message']) break
from TwitterAPI import TwitterAPI, TwitterPager consumer_key = 'GtWhGAVYvrZeP7VmBtpRjIZVo' consumer_secret = 'OWrWYT6tFZnYfY5f1rhg9vqmB0xTDqxH80wh08pDXQ2WgnLbhr' SCREEN_NAME = 'ns_consumentzuil' api = TwitterAPI(consumer_key, consumer_secret, auth_type='oAuth2') tweets = TwitterPager(api, 'statuses/user_timeline', { 'screen_name': SCREEN_NAME, 'count': 3 }) count = 0 for t in tweets.get_iterator(): if 'text' in t: count += 1 print(count, t['text']) elif 'message' in t: print(t['message']) break root = Tk() rt_frame = Frame(root) rt_frame.pack(fill='both', expand=True) root.mainloop
from TwitterAPI import TwitterAPI, TwitterPager SEARCH_TERM = 'pizza' api = TwitterAPI(<consumer key>, <consumer secret>, <access token key>, <access token secret>) pager = TwitterPager(api, 'search/tweets', {'q': SEARCH_TERM}) for item in pager.get_iterator(): print(item['text'] if 'text' in item else item)
with open("twitterauth.txt", "r") as f: secret = json.load(f) # Get API wrapper api = TwitterAPI(secret["Consumer Key"], secret["Consumer Secret"], secret["Access Token Key"], secret["Access Token Secret"]) # Assemble request with paging support # Dev environment endpoint: https://api.twitter.com/1.1/tweets/search/fullarchive/dev.json req = TwitterPager( api, "tweets/search/fullarchive/:dev", { "query": "(game of thrones OR khaleesi OR jon snow) has:links lang:en", "maxResults": 10 }) # Get <maxResults> items every <3> seconds count = 0 try: for item in req.get_iterator(wait=3): if "text" in item: count += 1 print(str(count)) print(item) elif 'message' in item: # something needs to be fixed before re-connecting raise Exception(item['message']) except TwitterError.TwitterRequestError as e: print("Error") print(e)
def searchTweet(data_list, topic, total_num, page_length, result_type, language, keywords_num, abstract_num, the_consumer_key, the_consumer_secret, the_access_token_key, the_access_token_secret): """ 搜索含有特定“内容”的推文 :param abstract_num: :param keywords_num: :param language: :param result_type: :param topic: :param page_length: :param total_num: :param the_consumer_key: 已有的consumer_key :param the_consumer_secret: 已有的consumer_secret :param the_access_token_key: 已有的access_token_key :param the_access_token_secret: 已有的access_token_secret :return: """ api = TwitterAPI(consumer_key=the_consumer_key, consumer_secret=the_consumer_secret, access_token_key=the_access_token_key, access_token_secret=the_access_token_secret) result = TwitterPager( api, 'search/tweets', { 'q': topic, 'count': page_length, 'result_type': result_type, 'lang': language }) data = {} for i, item in enumerate(result.get_iterator()): try: if 'text' in item: time = getTime(item['created_at']) name = item['user']['screen_name'] if (name, time.strftime('%H:%M:%S %b %d %Y')) not in data_list: if 'favorite_count' not in item: favorite = 0 else: favorite = item['favorite_count'] text = item['text'] url = findall('[a-zA-z]+://[^\s]*', text) if url: url = url[0] else: url = '' if item['entities']['hashtags']: raw_keywords = ' '.join([ each['text'] for each in item['entities']['hashtags'] ]) else: raw_keywords = '' #text_keyword_abstract(text, keywords_num, abstract_num) data.update( {i: [name, time, favorite, raw_keywords, url, text]}) elif 'message' in item: print('ERROR %s: %s\n' % (item['code'], item['message'])) continue if i >= total_num: break except: continue return data
'query': f'conversation_id:{CONVERSATION_ID}', 'expansions': 'author_id', 'tweet.fields': 'author_id,conversation_id,created_at,referenced_tweets' }, hydrate_tweets=True) # "wait=2" means wait 2 seconds between each request. # The rate limit is 450 requests per 15 minutes, or # 15*60/450 = 2 seconds. orphans = [] for item in pager.get_iterator(wait=2): node = TreeNode(item) print(f'{node.id()} => {node.parent()}', item['author_id']['username']) # COLLECT ANY ORPHANS THAT ARE CHILDREN OF THE NEW NODE orphans = [ orphan for orphan in orphans if not node.find_parent_of(orphan) ] # IF THE NEW NODE CANNOT BE PLACED IN TREE, ORPHAN IT UNTIL ITS PARENT IS FOUND if not root.find_parent_of(node): orphans.append(node) print('\nTREE...') root.print_tree(0) assert len(orphans) == 0, f'{len(orphans)} orphaned tweets' except TwitterRequestError as e:
import matplotlib.pyplot as plt import os # setup searchHashtag = os.environ['SEARCH_HASHTAG'] twitterApi = TwitterAPI(os.environ['TWITTER_CONSUMER_KEY'], os.environ['TWITTER_CONSUMER_SECRET'], os.environ['TWITTER_ACCESS_TOKEN'], os.environ['TWITTER_ACCESS_TOKEN_SECRET']) sentimentIntensityAnalyzer = SentimentIntensityAnalyzer() get_ipython().run_line_magic('matplotlib', 'osx') plt.ion() fig = plt.figure() plt.axis([0, 1, 0, 1]) twitterPager = TwitterPager(twitterApi, 'search/tweets', { 'q': searchHashtag, 'lang': 'en', 'tweet_mode': 'extended' }) for tweet in twitterPager.get_iterator(): polarityScores = sentimentIntensityAnalyzer.polarity_scores( tweet['full_text']) plt.scatter(polarityScores['pos'], polarityScores['neg'], s=2) plt.show() plt.pause(0.01)