def get_tweets_from_search(self, search_term, limit): """ Note: Limit value is not honored by the scraper. """ tweets = query_tweets(search_term, limit=limit) tweets.reverse() return tweet.Tweet.create_from_scraper_response(tweets)
def get_record(): list_of_tweets = query.query_tweets( query=q, limit=num, begindate=datetime.date(year_bg, month_bg, day_bg), enddate=datetime.date(year_ed, month_ed, day_ed), poolsize=20, lang=lang) return list_of_tweets
def main(): logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) try: # ============================================================================= # #query1=company[1] # query1='amazon' # limit1=1000 # begindateString = '2006-03-21' # begindate1=valid_date(begindateString) # enddate1=dt.date.today() # poolsize1=20 # lang1=None # output=query1+'.json' # tweets = query_tweets(query = query1, limit = limit1,begindate = begindate1, enddate = enddate1,poolsize = poolsize1, lang = lang1) # # if tweets: # with open(output, "w") as output: # json.dump(tweets, output, cls=JSONEncoder) # # ============================================================================= with open('coname_twitter_account_users.csv', 'r', newline='') as myFile: readInput = csv.reader(myFile) for i, company in enumerate(readInput): if i > 0 and company[1]: print(company[1]) output = company[1] + '.json' if isfile(output): logging.error("Output file already exists! Aborting.") #continue sys.exit(-1) query1 = company[1] #query1='amazon' limit1 = 100000 begindateString = '2006-03-21' begindate1 = valid_date(begindateString) enddate1 = dt.date.today() poolsize1 = 20 lang1 = 'en' tweets = query_tweets(query=query1, limit=limit1, begindate=begindate1, enddate=enddate1, poolsize=poolsize1, lang=lang1) if tweets: with open(output, "w") as output: json.dump(tweets, output, cls=JSONEncoder) except KeyboardInterrupt: logging.info("Program interrupted by user. Quitting...")
def hashtag_tweets(request, hashtag, limit): """ Returns a list of tweets by a hashtag param hashtag: str, specifies a hashtag optional limit: int, specifies the number of tweets to retrieve, default=30 """ tweets = query_tweets(hashtag, limit) data = [format_tweet(tweet) for tweet in tweets[:limit]] return JsonResponse(data, safe=False)
def get_tweet(query, limit=None, begindate=today, enddate=tomorrow, poolsize=20, lang=" "): list_tweet = [] if enddate != tomorrow: enddate = enddate + dt.timedelta(days=1) list_text = query_tweets(query, limit=limit, begindate=begindate, enddate=enddate, poolsize=poolsize, lang=lang) ## es hour_per = dt.timezone(dt.timedelta(hours=-5)) #Configurar la zona horaria for tweet in list_text: dict_ = { # user name & id "screen_name": tweet.screen_name, "username": tweet.username, "user_id": tweet.user_id, # tweet basic data "tweet_id": tweet.tweet_id, "tweet_url": tweet.tweet_url, "timestamp": tweet.timestamp.astimezone(hour_per), "timestamp_epochs": tweet.timestamp_epochs, # tweet text "text": tweet.text, "text_html": tweet.text_html, "links": tweet.links, "hashtags": tweet.hashtags, # tweet media "has_media": tweet.has_media, "img_urls": tweet.img_urls, "video_url": tweet.video_url, # tweet actions numbers "likes": tweet.likes, "retweets": tweet.retweets, "replies": tweet.replies, "is_replied": tweet.is_replied, # detail of reply to others "is_reply_to": tweet.is_reply_to, "parent_tweet_id": tweet.parent_tweet_id, "reply_to_users": tweet.reply_to_users } list_tweet.append(dict_) df = pd.DataFrame(list_tweet).sort_values(by="timestamp", ascending=False) df.drop_duplicates(subset=["user_id", "tweet_id"], keep="first", inplace=True) df.index = [i for i in range(0, len(df))] return df
def collect_tweets(name, articleDate, delta=30): name = name.lower() articleDate = datetime.strptime(articleDate, '%m/%d/%y') beginDate = (articleDate - timedelta(days=delta)).date() endDate = (articleDate + timedelta(days=delta)).date() # Collect tweets with mentions in the form of "FirstName LastName" tweets = query.query_tweets(name, limit=None, begindate=beginDate, enddate=endDate, poolsize=5, lang='en') tweets_serialized_pt1 = [tweet.__dict__ for tweet in tweets] # Collect tweets with mentions in the form of "FirstNameLastName" no_space_name = name.replace(' ', '') underline_name = name.replace(' ', '_') tweets = query.query_tweets(no_space_name, limit=None, begindate=beginDate, enddate=endDate, poolsize=5, lang='en') tweets_serialized_pt2 = [tweet.__dict__ for tweet in tweets] tweets_serialized = tweets_serialized_pt1 + tweets_serialized_pt2 print(tweets_serialized_pt1) outfile_str = underline_name + '_tweets' + '.json' with open(outfile_str, 'w') as outfile: json.dump(tweets_serialized, outfile, default=datetime_handler) print('tweets saved!')
def main(): logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) try: parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, description=__doc__) parser.add_argument("query", type=str) parser.add_argument("-o", "--output", type=str, default="tweets.json") parser.add_argument("-l", "--limit", type=int, default=None) parser.add_argument("-a", "--all", action='store_true') parser.add_argument("--lang", type=str, default=None) parser.add_argument("-d", "--dump", action="store_true") parser.add_argument("-bd", "--begindate", type=valid_date, default="2017-01-01", metavar='\b') parser.add_argument("-ed", "--enddate", type=valid_date, default=dt.date.today(), metavar='\b') args = parser.parse_args() if isfile(args.output) and not args.dump: logging.error("Output file exits. Aborting!") exit(-1) if args.all: args.begindate = dt.date(2007, 3, 1) tweets = query_tweets(query=args.query, limit=args.limit, begindate=args.begindate, enddate=args.enddate, poolsize=args.poolsize, lang=args.lang) if args.dump: print(json.dumps(tweets, cls=JSONEncoder)) else: if tweets: with open(args.output, "w") as output: json.dump(tweets, output, cls=JSONEncoder) except KeyboardInterrupt: logging.info("Program interrupted by user. Quitting...")
def scrape_tweets(query, start, end): # query = e1.get() # begin = dt.date(int(sYear.get()), int(sMonth.get()), int(sDay.get())) # end = dt.date(int(eYear.get()), int(eMonth.get()), int(eDay.get())) query = query begin = start end = end queried_tweets = query_tweets(query, 1000, begindate=begin, enddate=end, lang='english') #df = pd.DataFrame(t.__dict__ for t in queried_tweets) tweets = [] for x in queried_tweets: cleaned_tweets = {} cleaned_tweets['text'] = x.text cleaned_tweets['sentiment'] = get_sentiment(x.text) tweets.append(cleaned_tweets) return tweets
def main(): with open("data2.json", "w") as f: f.write("Current time {} START!!!\n".format(datetime.now().ctime())) user = '******' json_object_array = [] data = {} tweet_count_old = 0 for tweet in query_tweets(user,limit=None,begindate=dt.date(2019,12,10),enddate=dt.date(2019,12,11),poolsize=20,lang='english'): data['screen_name'] = tweet.screen_name.encode('utf-8') data['timestamp'] = tweet.timestamp.ctime() data['text'] = tweet.text.encode('utf-8') json_dump = json.dumps(data) json_object_array.append(json.loads(json_dump)) with open("data2.json", "a") as f: f.write("Got {} tweets from username {}\n".format( len(json_object_array) - tweet_count_old, user)) tweet_count_old = len(json_object_array) with open('data2.json', 'a') as f: json.dump(json_object_array, f, indent=2)
def getTwitter(self, q, size): #Populary Thresholds: #replies to post replies = 20 #favorites faves = 150 #retweets retweets = 20 #begining search date, default 2 years before today after = dt.date.today() - dt.timedelta(days=(731)) #end search date, default today before = dt.date.today() #number of threads in pool, high numbers return more tweets but increase runtime, recommended <=4 poolsize = 1 #adapt query for exact match query = '"' + str(q) + '"' #max number of tweets found by each thread qsize = 10 #build initial list of tweet objects via scraper list_of_tweets = query_tweets( str(query) + ' min_replies:' + str(replies) + ' min_faves:' + str(faves) + ' min_retweets:' + str(retweets), qsize, after, before, poolsize) #sort tweet objects by likes property sorted_list_of_tweets = sorted(list_of_tweets, key=lambda tweet: tweet.likes, reverse=True) #create empty list to be populated TweetLinkList = [] #add unique tweets to list for tweet in sorted_list_of_tweets: if (('https://www.twitter.com' + tweet.tweet_url, tweet.likes)) not in TweetLinkList: # print(tweet.tweet_url.encode('utf-8')) # print(tweet.likes) TweetLinkList.append( (('https://www.twitter.com' + tweet.tweet_url), tweet.likes)) return TweetLinkList[:size]
def process(category): # Save the retrieved tweets to file: dict_list = [] for tweet in query_tweets(category, limit=3000, begindate=dt.date(2013, 1, 1), enddate=dt.date(2014, 1, 1), poolsize=100, lang='en'): # print(json.dumps(tweet, cls=JSONEncoder)) # Eliminate unnecessary fields from tweet optimised_data = optimiser(json.dumps(tweet, cls=JSONEncoder), unnecessary_field=['fullname', 'timestamp']) dict_list.append(optimised_data) # Sorting tweets by rating (likes + replies + retweets) sorted_dict = sorted(dict_list, key=lambda d: d['rating'], reverse=True) timestr = time.strftime("%Y%m%d-%H%M%S") dictToCSV(category + "-data" + timestr + ".csv", sorted_dict[0].keys(), sorted_dict) return
def scrape_tweets(query, begin_date, end_date): """ :param query: user input query :param begin_date: :param end_date: :return: None if no matching keywords else pandas dataframe of tweets """ limit = None lang = 'english' filters = [ 'tweet_id', 'text', 'timestamp', 'likes', 'retweets', 'user_id', 'screen_name' ] tweets = query_tweets(query, limit=limit, lang=lang, begindate=begin_date, enddate=end_date) if len(tweets) > 0: data_frame = DataFrame(tweet.__dict__ for tweet in tweets)[filters] data_frame.dropna() return data_frame else: return None
def query(): list_of_tweets = query_tweets("秘密", 10, begindate=dt.date(2020, 3, 21)) for te in list_of_tweets: print(te.text)
from twitterscraper.query import query_tweets import csv import datetime import json #from ibm_watson import NaturalLanguageUnderstandingV1 #from ibm_cloud_sdk_core.authenticators import IAMAuthenticator #from ibm_watson.natural_language_understanding_v1 import Features, RelationsOptions #from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions keyword = 'covid' f = open(keyword + '.csv', 'w', encoding='UTF-8-sig', newline='') w = csv.writer(f, delimiter=',') list_of_tweets = query_tweets(keyword, begindate=datetime.date(2020, 7, 27), enddate=datetime.date(2020, 8, 1), limit=100) for tweet in list_of_tweets: w.writerow([tweet.timestamp, tweet.text]) f.close() authenticator = IAMAuthenticator( 'Fuxoqi_ltW0gcE6PZkYT-lMS8zsY0Xtd7AfaKzqesa_W') natural_language_understanding = NaturalLanguageUnderstandingV1( version='2019-07-12', authenticator=authenticator) natural_language_understanding.set_service_url( 'https://api.kr-seo.natural-language-understanding.watson.cloud.ibm.com/instances/f85b9cf9-3ab1-477c-8627-5dd173ced2c1' ) f = open('covid.csv', 'r', encoding='UTF-8-sig', newline='') rdr = csv.reader(f)
def main(): try: parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=__doc__ ) parser.add_argument("query", type=str, help="Advanced twitter query") parser.add_argument("-o", "--output", type=str, default="tweets.json", help="Path to a JSON file to store the gathered " "tweets to.") parser.add_argument("-l", "--limit", type=int, default=None, help="Number of minimum tweets to gather.") parser.add_argument("-a", "--all", action='store_true', help="Set this flag if you want to get all tweets " "in the history of twitter. Begindate is set to 2006-03-01." "This may take a while. You can increase the number of parallel" "processes depending on the computational power you have.") parser.add_argument("-c", "--csv", action='store_true', help="Set this flag if you want to save the results to a CSV format.") parser.add_argument("-u", "--user", action='store_true', help="Set this flag to if you want to scrape tweets from a specific user" "The query should then consist of the profilename you want to scrape without @") parser.add_argument("--lang", type=str, default=None, help="Set this flag if you want to query tweets in \na specific language. You can choose from:\n" "en (English)\nar (Arabic)\nbn (Bengali)\n" "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n" "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n" "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n" "id (Indonesian)\nit (Italian)\nja (Japanese)\n" "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n" "no (Norwegian)\npl (Polish)\npt (Portuguese)\n" "ro (Romanian)\nru (Russian)\nsv (Swedish)\n" "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n" "ur (Urdu)\nvi (Vietnamese)\n" "zh-cn (Chinese Simplified)\n" "zh-tw (Chinese Traditional)" ) parser.add_argument("-d", "--dump", action="store_true", help="Set this flag if you want to dump the tweets \nto the console rather than outputting to a file") parser.add_argument("-bd", "--begindate", type=valid_date, default="2006-03-21", help="Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21", metavar='\b') parser.add_argument("-ed", "--enddate", type=valid_date, default=dt.date.today(), help="Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.", metavar='\b') parser.add_argument("-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n" "Default value is set to 20. \nYou can change this number if you have more computing power available. \n" "Set to 1 if you dont want to run any parallel processes.", metavar='\b') args = parser.parse_args() if isfile(args.output) and not args.dump: logger.error("Output file already exists! Aborting.") exit(-1) if args.all: args.begindate = dt.date(2006,3,1) if args.user: tweets = query_tweets_from_user(user = args.query, limit = args.limit) else: tweets = query_tweets(query = args.query, limit = args.limit, begindate = args.begindate, enddate = args.enddate, poolsize = args.poolsize, lang = args.lang) if args.dump: print(json.dumps(tweets, cls=JSONEncoder)) else: if tweets: with open(args.output, "w", encoding="utf-8") as output: if args.csv: f = csv.writer(output) f.writerow(["user", "fullname", "tweet-id", "timestamp", "url", "likes", "replies", "retweets", "text", "html"]) for x in tweets: f.writerow([x.user, x.fullname, x.id, x.timestamp, x.url, x.likes, x.replies, x.retweets, x.text, x.html]) else: json.dump(tweets, output, cls=JSONEncoder) except KeyboardInterrupt: logger.info("Program interrupted by user. Quitting...")
def main(): try: parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, description=__doc__) parser.add_argument("query", type=str, help="Advanced twitter query") parser.add_argument("-o", "--output", type=str, default="tweets.json", help="Path to a JSON file to store the gathered " "tweets to.") parser.add_argument("-l", "--limit", type=int, default=None, help="Number of minimum tweets to gather.") parser.add_argument( "-a", "--all", action='store_true', help="Set this flag if you want to get all tweets " "in the history of twitter. Begindate is set to 2006-03-01." "This may take a while. You can increase the number of parallel" "processes depending on the computational power you have.") parser.add_argument( "-c", "--csv", action='store_true', help= "Set this flag if you want to save the results to a CSV format.") parser.add_argument( "--lang", type=str, default=None, help= "Set this flag if you want to query tweets in \na specific language. You can choose from:\n" "en (English)\nar (Arabic)\nbn (Bengali)\n" "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n" "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n" "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n" "id (Indonesian)\nit (Italian)\nja (Japanese)\n" "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n" "no (Norwegian)\npl (Polish)\npt (Portuguese)\n" "ro (Romanian)\nru (Russian)\nsv (Swedish)\n" "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n" "ur (Urdu)\nvi (Vietnamese)\n" "zh-cn (Chinese Simplified)\n" "zh-tw (Chinese Traditional)") parser.add_argument( "-d", "--dump", action="store_true", help= "Set this flag if you want to dump the tweets \nto the console rather than outputting to a file" ) parser.add_argument( "-bd", "--begindate", type=valid_date, default="2006-03-21", help= "Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21", metavar='\b') parser.add_argument( "-ed", "--enddate", type=valid_date, default=dt.date.today(), help= "Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.", metavar='\b') parser.add_argument( "-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n" "Default value is set to 20. \nYou can change this number if you have more computing power available. \n" "Set to 1 if you dont want to run any parallel processes.", metavar='\b') args = parser.parse_args() if isfile(args.output) and not args.dump: logger.error("Output file already exists! Aborting.") exit(-1) if args.all: args.begindate = dt.date(2006, 3, 1) tweets = query_tweets(query=args.query, limit=args.limit, begindate=args.begindate, enddate=args.enddate, poolsize=args.poolsize, lang=args.lang) if args.dump: print(json.dumps(tweets, cls=JSONEncoder)) else: if tweets: with open(args.output, "w") as output: if args.csv: f = csv.writer(output) f.writerow([ "user", "fullname", "tweet-id", "timestamp", "url", "likes", "replies", "retweets", "text", "html" ]) for x in tweets: f.writerow([ x.user, x.fullname, x.id, x.timestamp, x.url, x.likes, x.replies, x.retweets, x.text, x.html ]) else: json.dump(tweets, output, cls=JSONEncoder) except KeyboardInterrupt: logger.info("Program interrupted by user. Quitting...")
from twitterscraper.query import query_tweets from twitterscraper.tweet import Tweet import datetime import time import json # Twitter 크롤링 start = time.time() # 시작 시간 저장 list_of_tweets = query_tweets('총선', begindate=datetime.date(2020, 4, 1), enddate=datetime.date(2020, 4, 30)) # JSON 저장 file_path = "./data_0401_0430.json" data = [] count = 1 for tweet in list_of_tweets: data.append({"id": count, "content": tweet.text}) count += 1 # 파일 저장 with open(file_path, "w", encoding="utf-8") as make_file: json.dump(data, make_file, indent="\t", ensure_ascii=False) total_time = time.time() - start hours = total_time // 3600 minutes = (total_time - (hours * 3600)) // 60 seconds = total_time - (hours * 3600) - (minutes * 60) print("{0}시간 {1}분 {2}초가 소요되었습니다.".format(hours, minutes, seconds))
def main(): try: parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, description=__doc__) parser.add_argument("query", type=str, help="Advanced twitter query") parser.add_argument("-o", "--output", type=str, default="tweets.json", help="Path to a JSON file to store the gathered " "tweets to.") parser.add_argument("-l", "--limit", type=int, default=None, help="Number of minimum tweets to gather.") parser.add_argument( "-a", "--all", action='store_true', help="Set this flag if you want to get all tweets " "in the history of twitter. Begindate is set to 2006-03-01." "This may take a while. You can increase the number of parallel" "processes depending on the computational power you have.") parser.add_argument( "-c", "--csv", action='store_true', help= "Set this flag if you want to save the results to a CSV format.") parser.add_argument( "-u", "--user", action='store_true', help= "Set this flag to if you want to scrape tweets from a specific user" "The query should then consist of the profilename you want to scrape without @" ) parser.add_argument( "--profiles", action='store_true', help= "Set this flag to if you want to scrape profile info of all the users where you" "have previously scraped from. After all of the tweets have been scraped it will start" "a new process of scraping profile pages.") parser.add_argument( "--lang", type=str, default=None, help= "Set this flag if you want to query tweets in \na specific language. You can choose from:\n" "en (English)\nar (Arabic)\nbn (Bengali)\n" "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n" "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n" "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n" "id (Indonesian)\nit (Italian)\nja (Japanese)\n" "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n" "no (Norwegian)\npl (Polish)\npt (Portuguese)\n" "ro (Romanian)\nru (Russian)\nsv (Swedish)\n" "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n" "ur (Urdu)\nvi (Vietnamese)\n" "zh-cn (Chinese Simplified)\n" "zh-tw (Chinese Traditional)") parser.add_argument( "-d", "--dump", action="store_true", help= "Set this flag if you want to dump the tweets \nto the console rather than outputting to a file" ) parser.add_argument( "-ow", "--overwrite", action="store_true", help= "Set this flag if you want to overwrite the existing output file.") parser.add_argument( "-bd", "--begindate", type=valid_date, default="2006-03-21", help= "Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21", metavar='\b') parser.add_argument( "-ed", "--enddate", type=valid_date, default=dt.date.today(), help= "Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.", metavar='\b') parser.add_argument( "-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n" "Default value is set to 20. \nYou can change this number if you have more computing power available. \n" "Set to 1 if you dont want to run any parallel processes.", metavar='\b') parser.add_argument( "--loglevel", type=valid_loglevel, default=logging.INFO, help="Specify the level for logging. \n" "Must be a valid value from https://docs.python.org/2/library/logging.html#logging-levels. \n" "Default log level is set to INFO.") parser.add_argument( "-dp", "--disableproxy", action="store_true", default=False, help= "Set this flag if you want to disable use of proxy servers when scrapping tweets and user profiles. \n" ) args = parser.parse_args() logging.basicConfig() logger.setLevel(args.loglevel) if isfile(args.output) and not args.dump and not args.overwrite: logger.error("Output file already exists! Aborting.") exit(-1) if args.all: args.begindate = dt.date(2006, 3, 1) if args.user: tweets = query_tweets_from_user(user=args.query, limit=args.limit, use_proxy=not args.disableproxy) else: tweets = query_tweets(query=args.query, limit=args.limit, begindate=args.begindate, enddate=args.enddate, poolsize=args.poolsize, lang=args.lang, use_proxy=not args.disableproxy) if args.dump: pprint([tweet.__dict__ for tweet in tweets]) else: if tweets: with open(args.output, "w", encoding="utf-8") as output: if args.csv: f = csv.writer(output, delimiter=";", quoting=csv.QUOTE_NONNUMERIC) f.writerow([ "screen_name", "username", "user_id", "tweet_id", "tweet_url", "timestamp", "timestamp_epochs", "text", "text_html", "links", "hashtags", "has_media", "img_urls", "video_url", "likes", "retweets", "replies", "is_replied", "is_reply_to", "parent_tweet_id", "reply_to_users" ]) for t in tweets: f.writerow([ t.screen_name, t.username, t.user_id, t.tweet_id, t.tweet_url, t.timestamp, t.timestamp_epochs, t.text, t.text_html, t.links, t.hashtags, t.has_media, t.img_urls, t.video_url, t.likes, t.retweets, t.replies, t.is_replied, t.is_reply_to, t.parent_tweet_id, t.reply_to_users ]) else: json.dump(tweets, output, cls=JSONEncoder) if args.profiles and tweets: list_users = list(set([tweet.username for tweet in tweets])) list_users_info = [ query_user_info(elem, not args.disableproxy) for elem in list_users ] filename = 'userprofiles_' + args.output with open(filename, "w", encoding="utf-8") as output: json.dump(list_users_info, output, cls=JSONEncoder) except KeyboardInterrupt: logger.info("Program interrupted by user. Quitting...")