def extract_tweets(consumer_key, consumer_secret, access_token_key, access_token_secret, label): api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) PRODUCT = 'fullarchive' LABEL = label SEARCH_TERM = '(bus OR smrt OR transport OR mrt OR lrt OR public transport OR sbs OR sbs transit OR transitlink OR lta OR towertransit OR land transport authority) lang:en place:Singapore' # Adding place automatially removes retweets pager = TwitterPager( api, 'tweets/search/%s/:%s' % (PRODUCT, LABEL), { 'query': SEARCH_TERM, 'maxResults': 500, 'toDate': 2020102300001, 'fromDate': 201501010001 }) responseValues = [] for item in pager.get_iterator(): if 'text' in item: responseValues.append({ 'created_at': item['created_at'], 'tweet_coordinates': item['place'], 'text': item['text'] }) elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break tweets = pd.DataFrame(responseValues) # Changing the timing tweets['created_at'] = tweets['created_at'].apply(utc_former) return tweets
def search_tweets(the_consumer_key, the_consumer_secret, the_access_token_key, the_access_token_secret): api = TwitterAPI(consumer_key=the_consumer_key, consumer_secret=the_consumer_secret, access_token_key=the_access_token_key, access_token_secret=the_access_token_secret) r = TwitterPager(api, 'search/tweets', { 'q': 'pizza', 'count': 10, 'lang': 'en' }) for item in r.get_iterator(): # print item if 'id' in item: print item['id'] if 'text' in item: print item['text'] # if 'entities' in item: # print item['entities'] if 'message' in item and item['code'] == 88: print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'] break print '\r\n\r\n\r\n'
def getTweets(handle, year, num): print 'Getting some Tweets for you...' pager = TwitterPager( api, currentAPI, { 'screen_name': handle, 'count': num, 'include_rts': False, 'exclude_replies': True, '' 'since_id': ID_Ranges['start'][year], 'max_id': ID_Ranges['end'][year] }) count = 0 for item in pager.get_iterator(wait=3.5): if count >= num: break # limit the number of tweets for the moment if 'text' in item: getContent(item) count = count + 1 elif 'message' in item: print(item['message']) break # Add ListofTweets[] to specific candidate try: for c in candidateList: if (c.name == listOfTweets[0].author) and (c.electionYear == year): c.addTweets(listOfTweets) break except: print 'Error: Could not Find Candidate'
def get_twitter_data(start_time): """ Collects tweets from prominent forex accounts over specified interval. Args: start_time: String of RFC33339 formatted date Returns: List with dictionaries containing tweet text, when the were created, and public metrics """ # Get tweets in batches of 100 for speed # 5 second delay between pages to prevent rate limiting pager = TwitterPager( api, 'tweets/search/recent', { 'query': 'from:FXstreetNews OR from:forexcom', 'tweet.fields': 'public_metrics,created_at', 'start_time': str(start_time), 'max_results': 100 }) tweet_data = [] counter = 0 for item in pager.get_iterator(new_tweets=False): tweet_data.append({ "text": item['text'], "created_at": item['created_at'] }) print(item) counter += 1 print(counter) return tweet_data
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): """Get tweets containing any words in 'word_list'.""" words = ' OR '.join(word_list) params = {'q': words, 'count': count} if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius rate_count = 0 while True: pager = TwitterPager(api, 'search/tweets', params) rate_count += 1 for item in pager.get_iterator(): if 'text' in item: if not no_retweets or not item.has_key('retweeted_status'): process_tweet(item, photo_dir, stalk, no_retweets) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message']) if rate_count == 13: print( "================================ Sleeping ==================================" ) time.sleep(600) print( "================================ Awake ==================================" ) rate_count = 0
async def dril(self, ctx, num=1, distance=500): # await self.bot.say("Give me a moment!") api = TwitterAPI(k1, k2, auth_type='oAuth2') SCREEN_NAME = 'dril' pager = TwitterPager(api,'statuses/user_timeline', {'screen_name':SCREEN_NAME, 'count':200}) count = 0 tweets = [] for item in pager.get_iterator(wait=0.1): if 'text' in item: count = count + 1 # await self.bot.say(str(count) + ". " + item['text']) tweets.append(item['text']) if (count > distance): break elif 'message' in item: await ctx.send(item['message']) break for i in range(0, num): if not tweets: break msg = random.choice(tweets) await ctx.send(msg) tweets.remove(msg)
def get_tweets(self): params = dict( user_id=self.user.id_str, since_id=self.init_tweet.id_str, count=200, include_rts=True, tweet_mode='extended', ) if self.final_tweet: params['max_id'] = self.final_tweet.id_str response = self.twitter_api.request( 'statuses/user_timeline', params, ) response.response.raise_for_status() if len(response.json()) > 199: pager = TwitterPager(self.twitter_api, 'statuses/user_timeline', params) tweets = [] for i in pager.get_iterator(wait=3.5): tweets.append(i) else: tweets = response.json() return tweets
def __get_tweets(self, method, max_date): r = TwitterPager(self.__api, method, {'count': 200}) ret = [] for item in r.get_iterator(): if dt_helpers.convert_to_date(item["created_at"]) < max_date: ret.append(item) return ret
def get_tweets(self, q, count=5): tweets = [] r = TwitterPager(self.api, 'search/tweets', {'q': q, 'count': count}) for item in r.get_iterator(): if 'text' in item: tweets.append(item['text']) elif 'message' in item and item['code'] == 88: print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'] break return tweets
def get_all_tweets(screen_name): pager = TwitterPager(api, 'statuses/user_timeline', {'screen_name': screen_name, 'count': 200}) alltweets = pager.get_iterator(wait=3.5) outtweets = [ [screen_name, tweet['id_str'], pd.to_datetime(tweet['created_at']), tweet['user']['location'], tweet['retweet_count'], tweet['favorite_count'], tweet['lang'], tweet['text']] for tweet in alltweets] df = pd.DataFrame(outtweets, columns=['user_name', 'id', 'create_time', 'geo', 'retweets','favorite_count', 'language', 'text']) df.to_csv('results.csv', index=False) print('finish')
def get_search(self): next = '' while True: tweets = TwitterPager( self.api, 'tweets/search/%s/:%s' % (self.PRODUCT, self.LABEL), { 'query': self.search_term, 'fromDate': self.fromDate, 'toDate': self.toDate }) print(tweets) for tweet in tweets.get_iterator(): if 'text' in tweet: self.full_tweet_list.append(tweet)
def rank_old_hashtags(api, word_list, n): words = ' OR '.join(word_list) count = {} while True: pager = TwitterPager(api, 'search/tweets', {'q':words, 'count':COUNT}) for item in pager.get_iterator(): if 'text' in item: process_tweet(item['text'], count, n) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message'])
def do_search(api,db,keyword_query,geocode,from_id,to_id,next_id): #r = api.request('statuses/filter', {'locations': '112.5,-37.5,154.1,-12.8'}) next_id=-1 cur_id=-1 if from_id==-1: from_id=None if to_id==-1: to_id=0 count=0 pager = TwitterPager(api, 'search/tweets', {'q': keyword_query, 'geocode': geocode, 'count': '100', 'max_id': str(from_id), 'since_id' : str(to_id)}) while True: try: for item in pager.get_iterator(): #print(item) if 'text' in item: #try: if True: #print item["id"] cur_id=int(item["id"]) #if next_id != -1, we run in re-start mode, don't reset next_id #else we need to update next_id when the first item arrives in this iteration #and next iteration's to_id will be set to next_id of this iteration if next_id==-1: next_id=cur_id if cur_id<=to_id: break info=get_dict_object_from_tweet(item) if not info: print "Error parsing the tweet, ignore it" continue #put the data in the db db.put(info) count+=1 if count % 1000 == 0: print count #print item["id"],"ok" #print(info["post_text"]) #persist the progress to ensure we can resume the harvester from here progress.update(cur_id,to_id,next_id) elif 'message' in item: # something needs to be fixed before re-connecting raise Exception(item['message']) return count except TwitterAPI.TwitterError.TwitterRequestError,e: if e.status_code==429: print ("Too Many Requests, now sleeping...") sleep(60) else: raise e
def tweets(query='', filename='', q=''): ''' Collect Tweets by a user (max. 3200) or through a search query (max. last 10 days). ''' if filename == '': if q == '' or q is None: filename = '{0}/{1}.tweets.jsonl'.format(DIR, encode_query(query)) else: filename = '{0}/{1}.tweets.jsonl'.format(DIR, encode_query(q)) if q == '' or q is None: click.echo('Requesting Tweets by @{}'.format(query)) r = TwitterPager(api, 'statuses/user_timeline', { 'screen_name': query, 'count': 200, 'tweet_mode': 'extended' }) else: click.echo('Requesting Tweets with the search query {}'.format(q)) r = TwitterPager(api, 'search/tweets', { 'q': q, 'count': 100, 'tweet_mode': 'extended' }) n = 0 with open(filename, 'a', encoding='utf-8') as f: for item in r.get_iterator(wait=2): n += 1 if n % 1000 == 0: click.echo('{0} Tweets received. Oldest from {1}.'.format( n, item['created_at'])) if 'full_text' in item: json.dump(item, f) f.write('\n') elif 'message' in item and item['code'] == 88: click.echo('SUSPEND, RATE LIMIT EXCEEDED: {}\n'.format( item['message'])) break click.echo('Saved {0} Tweets in {1}'.format(n, filename)) return
def get_timeline(self, count: int = 20, since: int = None, max: int = None) -> TwitterPager: api = self.get_api() if api != None: pager = TwitterPager(api, 'statuses/home_timeline', self.query_params(count, since, max)) return pager else: raise ValueError("API is not set!")
def get_followers(screen_names, connection, pager=True): follower_dict = {} for screen_name in screen_names: if pager is False: followers = connection.request('followers/ids', {'screen_name': screen_name}) follower_dict[screen_name] = followers else: followers = [] r = TwitterPager(connection, 'followers/ids', {'screen_name': screen_name}) for item in r.get_iterator(): followers.append(item) follower_dict[screen_name] = followers return follower_dict
def getTweetCount(q, p=False, debug=False): auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = TwitterAPI.TwitterAPI(consumer_key, consumer_secret, access_token, access_token_secret) #today's info a = time.time() timeStamp = datetime.datetime.utcnow().date() tDay = timeStamp.day tMonth = timeStamp.strftime("%b") api = TwitterAPI.TwitterAPI(consumer_key, consumer_secret, access_token, access_token_secret) count = 0 r = TwitterPager(api, 'search/tweets', {'q': q, 'count': 100}) for item in r.get_iterator(wait=6): time_stamp = item['created_at'] day = int(time_stamp[8:10]) if (tDay != day and tDay - 1 != day): break if 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break if (tDay - 1 == day): count += 1 if (p): print("Term: " + q + " on " + item["created_at"]) if (debug): b = time.time() c = int(b - a) print("\nThis took " + str(round(c / 60, 2)) + " minutes") return count #res = tweet.getTweetCount("qwertyuiop", False,True) #print(res)
def tweets(query='', filename='', q=''): ''' Collect Tweets by a user (max. 3200) or through a search query (max. last 10 days). ''' if filename == '': if q == '' or q is None: filename = '{}.tweets.jsonl'.format( encode_query(query)) else: filename = '{}.tweets.jsonl'.format(encode_query(q)) if q == '' or q is None: click.echo('Requesting Tweets by @{}'.format(query)) r = TwitterPager(api, 'statuses/user_timeline', {'screen_name': query, 'count': 200, 'tweet_mode': 'extended'}) else: click.echo('Requesting Tweets with the search query {}'.format(q)) r = TwitterPager(api, 'search/tweets', {'q': q, 'count': 100, 'tweet_mode': 'extended'}) n = 0 with open(filename, 'a', encoding='utf-8') as f: for item in r.get_iterator(wait=2): n += 1 if n % 1000 == 0: click.echo('{0} Tweets received. Oldest from {1}.'.format( n, item['created_at'])) if 'full_text' in item: json.dump(item, f) f.write('\n') elif 'message' in item and item['code'] == 88: click.echo( 'SUSPEND, RATE LIMIT EXCEEDED: {}\n'.format(item['message'])) break click.echo('Saved {0} Tweets in {1}'.format(n, filename)) return
def get_user_timeline(self, user_name, n=200, from_date=None, to_date=None): logger.info(f'tw api timeline for user: {user_name}') pager = TwitterPager(self.api, 'statuses/user_timeline', { 'screen_name': user_name, 'count': n, 'exclude_replies': 'true' }) return self.__get_tweets(pager, n, from_date, to_date, 2)
def query_api(): o = TwitterOAuth.read_file() api = TwitterAPI( o.consumer_key, o.consumer_secret, o.access_token_key, o.access_token_secret, auth_type='oAuth2', api_version='2' ) pager = TwitterPager( api, 'tweets/search/all', { 'query':QUERY, 'tweet.fields': ','.join([ 'author_id', 'created_at', 'public_metrics', 'referenced_tweets', 'in_reply_to_user_id', ]), 'expansions': ','.join([ 'author_id', 'referenced_tweets.id', 'referenced_tweets.id.author_id', 'in_reply_to_user_id', 'attachments.media_keys', ]), 'media.fields': 'url', 'user.fields': 'username,name', 'start_time': '2021-02-25T06:00:00Z', 'end_time': '2021-03-29T12:00:00Z', 'max_results': 500 } ) return pager.get_iterator(wait=2)
def Request_Tofile(Api, file_name): ''' Function using TwitterPager module to paginate of response from Twitter REST API and write it to a json-file. IN: file_name represent the name and format of file from which we save all the tweets which are in JSON format OUT: file_name of the saved Json data.''' temp_list = [] Request = TwitterPager( Api, 'tweets/search/fullarchive/:Fullarchive', { 'query': '(Rålambshovsparken OR #Rålambshovsparken) -RT lang:sv', 'maxResults': '100', 'fromDate': '201501010000', 'toDate': '201912310000' }) for item in Request.get_iterator(): temp_list.append(item) temp_dict = {'statuses': temp_list} with open(file_name, 'w') as file: json.dump(temp_dict, file, indent=2) #easier to read return file_name
def get_timeline(SCREEN_NAME, max): with open("keys.txt", 'r') as f: consumer_key = f.readline() consumer_key = consumer_key.rstrip("\n") consumer_secret = f.readline() api = TwitterAPI(consumer_key, consumer_secret, auth_type='oAuth2') pager = TwitterPager(api, 'statuses/user_timeline', { 'screen_name': SCREEN_NAME, 'count': 200 }) tweets = [] count = 0 for item in pager.get_iterator(wait=3.5): if 'text' in item: count += 1 tweets.append(item['text']) if count > (max - 1): break return tweets
def __prepare_request(self): hash_combine = self.hashtags if ( type(self.hashtags) is str) else " OR ".join(self.hashtags) query = "({}) lang:en".format(hash_combine) endpoint = self.helper.config_item('twitter_config.{}'.format( self.collect_mode)) request_config = {'query': query, 'maxResults': 100} if self.toDate != None: if not self.__validate_parameter(self.toDate, 'toDate'): raise Exception(self.validation_error) else: request_config['toDate'] = self.toDate self.pager = TwitterPager(self.api, endpoint, request_config)
def premium_search(self, product='fullarchive', label='prod', query='', since=None, until=None, n=100): logger.info(f'tw api search for: {query}') pager = TwitterPager( self.api, f'tweets/search/{product}/:{label}', { 'query': query, 'fromDate': since.strftime('%Y%m%d%H%M'), 'toDate': until.strftime('%Y%m%d%H%M') }) return self.__get_tweets(pager, n, parse=False)
def collect_tweets(twitter, request): """ Collects a number of tweets and stores them in a text file """ geocode, count, filepath = request iterator = TwitterPager(twitter, 'search/tweets', { 'geocode': geocode }).get_iterator() k = 0 with open(filepath, 'w', encoding='utf-8') as f: while k < count: k += 1 item = next(iterator, {'message': 'sentinel'}) if 'text' in item: # Remove extra whitespace and newlines line = ' '.join(item.get('text').split()) print(line, file=f) elif 'message' in item: break
def get_response(tw_api, method, options, pages_limit): """ using TwitterPager, search tweets according to a given method The format is in JSON. If there is no tweets found, the message that No tweets found is returned tw_app : TwitterAPI object method : string ex) 'serach/tweets', 'statuses/user_timeline' options : dict ex) '{ 'screen_name': name }, {'q' : query } pages_limit : int the limited number of pages return : the Jsonified list of tweets """ from TwitterAPI import TwitterPager response = TwitterPager(tw_api, method, options) return response
def startTwitterPremiumApi(): SEARCH_TERM = 'siber OR hacklendi OR (fidye yazılımı) lang:tr' PRODUCT = 'fullarchive' LABEL = 'production' api = TwitterAPI(config.CONSUMER_KEY, config.CONSUMER_SECRET, config.ACCESS_TOKEN, config.ACCESS_SECRET) r = TwitterPager( api, 'tweets/search/%s/:%s' % (PRODUCT, LABEL), { 'query': SEARCH_TERM, 'fromDate': '201512070000', 'toDate': '201512132359', "maxResults": "100" }).get_iterator() csvFile = io.open('2015_before_nictr_attack.csv', 'w', encoding='UTF-8') csvWriter = csv.writer(csvFile) for item in r: csvWriter.writerow([ item['created_at'], item["id_str"], item['user']['screen_name'], item['text'] if 'text' in item else item ])
# code to scrape Twitter API import re import os import csv import itertools import collections from TwitterAPI import TwitterAPI, TwitterPager # File to write file = csv.writer( open("die-hard-tweets-no-retweets.csv", "w+", encoding="utf-8")) # Initialize Twitter API api = TwitterAPI('*key*', '*secret key*', '*access key*', '*secret access key') r = TwitterPager(api, 'search/tweets', { 'q': 'Die Hard Christmas-filter:retweets', 'tweet_mode': 'extended' }) # Write to file for item in r.get_iterator(): row = item['full_text'] if 'full_text' in item else '' row = row.replace("\n", " ") print(row) file.writerow([row])
def photogallary(request): if request.method == "POST": form = PhotoForm(request.POST) if form.is_valid(): photo = form.save(commit=False) photo.published_date = timezone.now() post = Photoa.objects.all().filter(name=photo.tag) if not post: api = TwitterAPI( 'X2XG271rJPmdxtVVMz1ejtxMZ', 'jvGO68wXnnlC5gw3kWdpqZ7NkQwze2eDOxRFnIjZLQDY2iZ1vO', '998624117602435073-8obGWt14pP19zD5ZtRjdbGuVZ7JTgCg', 'nZVJjHP1w1FZqr245PRcvdrYCpZeH2rKWxmaYsmcXUdLX') r = TwitterPager(api, 'search/tweets', { 'q': photo.tag, 'count': 10 }) list = [] for item in r.get_iterator(wait=20): try: if 'text' in item: images = item['extended_entities']['media'] for image in images: if image['type'] == 'video': #imagelink = image['media_url'] for video in image["video_info"][ "variants"]: imagelink = video["url"] url = imagelink p = url.split('/')[-1] l = p.split('?')[0] q = l.split('.')[-1] if q == 'mp4': list.extend(imagelink) req = urllib.request.Request(url) resp = urllib.request.urlopen(req) respData = resp.read() from django.core.files.base import ContentFile f2 = ContentFile(respData) fs = default_storage filename = fs.save( url.split('/')[-1].split('?') [0], f2) pob = Photoa() pob.name = photo.tag pob.img.save(filename, f2, save=False) pob.url_img = url pob.save(True) photo.save() elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break except KeyError: pass post = Photoa.objects.all().filter(name=photo.tag) return render(request, 'blog/display.html', {'list': post}) list1 = [] for p in post: list1.extend(p.url_img) return render(request, 'blog/display.html', {'list': post}) else: form = PhotoForm() return render(request, 'blog/photosearch.html', {'form': form})
# Print a user's timeline. This will get up to 3,200 tweets, which # is the maximum the Twitter API allows. from TwitterAPI import TwitterAPI, TwitterPager SCREEN_NAME = 'TheTweetOfGod' api = TwitterAPI(<consumer key>, <consumer secret>, auth_type='oAuth2') pager = TwitterPager(api, 'statuses/user_timeline', {'screen_name':SCREEN_NAME, 'count':200}) count = 0 for item in pager.get_iterator(wait=3.5): if 'text' in item: count = count + 1 print(count, item['text']) elif 'message' in item: print(item['message']) break
}, hydrate_tweets=True) for item in r: root = TreeNode(item) print(f'ROOT {root.id()}') # GET ALL REPLIES IN CONVERSATION # (RETURNED IN REVERSE CHRONOLOGICAL ORDER) pager = TwitterPager( api, 'tweets/search/recent', { 'query': f'conversation_id:{CONVERSATION_ID}', 'expansions': 'author_id', 'tweet.fields': 'author_id,conversation_id,created_at,referenced_tweets' }, hydrate_tweets=True) # "wait=2" means wait 2 seconds between each request. # The rate limit is 450 requests per 15 minutes, or # 15*60/450 = 2 seconds. orphans = [] for item in pager.get_iterator(wait=2): node = TreeNode(item) print(f'{node.id()} => {node.parent()}', item['author_id']['username'])
from tkinter import * from TwitterAPI import TwitterAPI, TwitterPager consumer_key = 'GtWhGAVYvrZeP7VmBtpRjIZVo' consumer_secret = 'OWrWYT6tFZnYfY5f1rhg9vqmB0xTDqxH80wh08pDXQ2WgnLbhr' SCREEN_NAME = 'ns_consumentzuil' api = TwitterAPI(consumer_key, consumer_secret, auth_type='oAuth2') tweets = TwitterPager(api, 'statuses/user_timeline', { 'screen_name': SCREEN_NAME, 'count': 3 }) count = 0 for t in tweets.get_iterator(): if 'text' in t: count += 1 print(count, t['text']) elif 'message' in t: print(t['message']) break root = Tk() rt_frame = Frame(root) rt_frame.pack(fill='both', expand=True)