def scrape_user_to_db(username): """Scrape a user and insert everything on them into the database. Will overwrite existing data!""" with db.get_db() as cursor: tweets = [] # If we've haven't scraped this user before, do a full scrape. If we have, only get the tweets # we don't have yet. cursor.execute("SELECT * FROM analyzed_users WHERE username=%s", username) if cursor.fetchone() is None: cursor.execute( "INSERT INTO analyzed_users (username, checked) VALUES (%s, NOW())", username) cursor.connection.commit() tweets = query_tweets_from_user(username, limit=5000) if len(tweets) == 0: return None else: cursor.execute( "SELECT checked FROM analyzed_users WHERE username=%s", username) d = cursor.fetchone()[0] d = d if d is not None else datetime.datetime.utcfromtimestamp(0) # If we've already checked this users's tweets within the past day, don't try it again if (datetime.datetime.now() - d).days == 0: return 0 cursor.execute( "UPDATE analyzed_users SET checked=NOW() WHERE username=%s", username) tweets = query_tweets_from_user(username, limit=5000) tweets = list(filter(lambda tw: d < tw.timestamp, tweets)) sql = "INSERT INTO tweets (username, content, created, retweets, favorites, replies, is_retweet, id, sentiment) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" set_username = False for tweet in tweets: try: # Set the user's full name if it hasn't already been set. if not set_username and tweet.user.lower() == username.lower(): cursor.execute( "UPDATE analyzed_users SET fullname=%s WHERE username=%s", (tweet.fullname, username)) set_username = True cursor.execute(sql, (username, tweet.text, tweet.timestamp, tweet.retweets, tweet.likes, tweet.replies, tweet.user.lower() != username.lower(), tweet.id, get_text_sentiment(tweet.text))) except pymysql.err.IntegrityError: pass cursor.connection.commit() return len(tweets)
def get_profile_tweets(handle, filename): profile = query_tweets_from_user(handle, limit=10) print('Loading...') with open(filename, "w", encoding="utf-8") as output: json.dump(profile, output, cls=JSONEncoder) profile_dataframe = pd.read_json(filename, encoding='utf-8') lstt = [] for i, val in enumerate(profile_dataframe['links']): if str(val) == '[]': lstt.append('[]') elif str(val[0][0:17]) == 'https://youtu.be/': pass else: summ = summary(profile_dataframe['links'][i][0]) lstt.append(summ) profile_dataframe['summary'] = pd.DataFrame(lstt) is_summary = [] for i, idx in enumerate(profile_dataframe['summary']): if len(str(idx)) > 3: is_summary.append(1) else: is_summary.append(0) profile_dataframe['is_summary'] = pd.DataFrame(is_summary) profile_dataframe.to_csv(filename[:-5] + ".csv") print('Loaded')
def get_profile_tweets(handle, filename): profile = query_tweets_from_user(handle, limit=10) print('Loading...') with open(filename, "w", encoding="utf-8") as output: json.dump(profile, output, cls=JSONEncoder) profile_dataframe = pd.read_json('my.json', encoding='utf-8') profile_dataframe.to_csv('profile_tweets.csv') print('Loaded')
def create_data(user_name): df = pd.DataFrame(columns=['tweet']) a = query_tweets_from_user(user_name, 100) for i, tweet in enumerate(a): df.loc[i] = tweet.text.replace("\n", ",") df.to_csv("scripts/test_dta.csv", index=False, encoding="utf-8") return df, "scripts/test_dta.csv"
def get_twitter_user_data(username): data = get_tweets(username, 150) json_content_info = fb.content_info(data) profile = fb.personality_data(json_content_info) profile_dict = ast.literal_eval(fb.personality_ratings(profile)) profile_dict['user_id'] = username profile_dict['name'] = query_tweets_from_user(username, 5)[0].fullname return profile_dict
def user_tweets(request, username, limit): """ Returns a list of user's tweets param username: str, specifies a user optional limit: int, specifies the number of tweets to retrieve, default=30 """ tweets = query_tweets_from_user(username, limit) data = [format_tweet(tweet) for tweet in tweets[:limit]] return JsonResponse(data, safe=False)
def get(self, user): args = parser.parse_args() pages_limit = args.get('pages_limit', DEFAULT_PAGES_LIMIT) list_of_tweets = [ _transform_to_json(tweet) for tweet in query_tweets_from_user( user=user, limit=pages_limit )[:pages_limit] ] return [list_of_tweets]
def get_user_info(twitter_user): """ An example of using the query_user_info method :param twitter_user: the twitter user to capture user data :return: twitter_user_data: returns a dictionary of twitter user data """ user_info = query_user_info(user=twitter_user) twitter_user_data = {} twitter_user_data["user"] = user_info.user # feature: screen_name_length twitter_user_data["fullname"] = user_info.full_name # user name twitter_user_data["location"] = user_info.location # feature: location twitter_user_data["blog"] = user_info.blog # feature: url twitter_user_data["date_joined"] = user_info.date_joined # feature: age twitter_user_data["id"] = user_info.id # twitter account id twitter_user_data[ "num_tweets"] = user_info.tweets # feature: statuses_count twitter_user_data[ "following"] = user_info.following # feature: friends_count twitter_user_data[ "followers"] = user_info.followers # feature: followers_count twitter_user_data["likes"] = user_info.likes # feature: favourites_count twitter_user_data["lists"] = user_info.lists # feature: listed_count #twitter_user_data["is_verified"] = user_info.is_verified twitter_user_data["description"] = user_info.description # verified # quoted tweets max_num_tweets = 10 latest_tweets = query_tweets_from_user( twitter_user, limit=max_num_tweets) # list of 200 tweet objects tweets, tweets_html, hashtags, has_media, num_retweets, num_likes, links, num_replies, reply_to_users, timestamp_epochs, is_quoted_tweet, quoted_user, quoted_text, = get_tweet_attribute( latest_tweets) twitter_user_data["tweets"] = tweets twitter_user_data["tweets_html"] = tweets_html twitter_user_data["hashtags"] = hashtags twitter_user_data["has_media"] = has_media twitter_user_data["num_retweets"] = num_retweets twitter_user_data["num_likes"] = num_likes twitter_user_data['links'] = links twitter_user_data['num_replies'] = num_replies twitter_user_data['reply_to_users'] = reply_to_users twitter_user_data['timestamp_epochs'] = timestamp_epochs twitter_user_data['is_quoted_tweet'] = is_quoted_tweet #twitter_user_data['is_retweet'] = is_retweet twitter_user_data['quoted_user'] = quoted_user twitter_user_data['quoted_text'] = quoted_text #twitter_user_data['retweet_user'] = retweet_user #twitter_user_data['retweet_text'] = retweet_text return twitter_user_data
def queringTweets(username): filename = "{}.json".format(username) filename1 = "{}.txt".format(username) tweets = query_tweets_from_user(username) f = open(username + ".txt", "a") j = [] for t in tweets: t.timestamp = t.timestamp.isoformat() f.write(" Tweet ID:{} Tarih:{}: {} \n".format(t.tweet_id, t.timestamp, t.text)) #j.append(t.__dict__) f.close() """with open(filename, "w") as f:
def _download_tweeted_and_retweeted(self): print('Downloading tweeted and retweeted...') for tweet in query_tweets_from_user(self.username): if tweet.user != self.username: occurences = self.num_retweets_by_originator.get(tweet.user, 0) self.num_retweets_by_originator[tweet.user] = occurences + 1 self.retweeted[tweet.id] = self._serialize_tweet(tweet) else: self.tweeted[tweet.id] = self._serialize_tweet(tweet) self.users_retweeted = list( map( lambda item: item[0], sorted(self.num_retweets_by_originator.items(), key=lambda item: item[1], reverse=True)))
def get_tweets(user): """ Scrape twitter to get tweets from user. """ # Currently pulls 200. Can adjust number. list_of_tweets = query_tweets_from_user(user, 200) tweets = [] # filter out retweets and direct quotations from other people for tweet in list_of_tweets: if tweet.user == user and "\u201c" not in tweet.text: # replace slanted apostrophes with normal ones tweets.append(re.sub(u"([‘’])", "'", tweet.text)) return tweets
def main(): ''' start = time.time() users = ['realDonaldTrump'] pool = Pool(8) for user in pool.map(get_user_info,users): twitter_user_info.append(user) cols=['id','fullname','date_joined','location','blog', 'num_tweets','following','followers','likes','lists'] data_frame = pd.DataFrame(twitter_user_info, index=users, columns=cols) data_frame.index.name = "Users" data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last') elapsed = time.time() - start print("Elapsed time: ") print(elapsed) display(data_frame) ''' with open("data.json", "w") as f: f.write("Current time {} START!!!\n".format(datetime.now().ctime())) users = [ 'realDonaldTrump', 'RobinBew', 'TheEIU', 'TheEconomist', 'seanmdav', 'erm3114', 'AgatheDemarais', 'john_c_ferguson', 'maxlambertson', 'davidfrum', 'Lagarde', 'RobertAlanWard' ] json_object_array = [] data = {} tweet_count_old = 0 for user in users: for tweet in query_tweets_from_user(user, limit=10): data['screen_name'] = tweet.screen_name.encode('utf-8') data['timestamp'] = tweet.timestamp.ctime() data['text'] = tweet.text.encode('utf-8') json_dump = json.dumps(data) json_object_array.append(json.loads(json_dump)) with open("data.json", "a") as f: f.write("Got {} tweets from username {}\n".format( len(json_object_array) - tweet_count_old, user)) tweet_count_old = len(json_object_array) with open('data.json', 'a') as f: json.dump(json_object_array, f, indent=2)
def download_user(user_id: str): work_dir = '../build/outputs' user_dir = work_dir + '/' + user_id os.makedirs(user_dir, exist_ok=True) tweet: Tweet tweets = query_tweets_from_user(user_id) with open(user_dir + "/tweets.json", "w", encoding="utf-8") as output: json.dump(tweets, output, ensure_ascii=False, cls=JSONEncoder) videos = [] pool_size = 16 pool = ThreadPool(pool_size) pool.map(partial(download_tw, user_dir=user_dir), tweets) for tweet in tweets: if tweet.video_url: videos.append(tweet.video_url + "\n") with open(user_dir + "/videos.txt", "a", encoding="utf-8") as video: video.writelines(videos)
def main(): try: parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=__doc__ ) parser.add_argument("query", type=str, help="Advanced twitter query") parser.add_argument("-o", "--output", type=str, default="tweets.json", help="Path to a JSON file to store the gathered " "tweets to.") parser.add_argument("-l", "--limit", type=int, default=None, help="Number of minimum tweets to gather.") parser.add_argument("-a", "--all", action='store_true', help="Set this flag if you want to get all tweets " "in the history of twitter. Begindate is set to 2006-03-01." "This may take a while. You can increase the number of parallel" "processes depending on the computational power you have.") parser.add_argument("-c", "--csv", action='store_true', help="Set this flag if you want to save the results to a CSV format.") parser.add_argument("-u", "--user", action='store_true', help="Set this flag to if you want to scrape tweets from a specific user" "The query should then consist of the profilename you want to scrape without @") parser.add_argument("--lang", type=str, default=None, help="Set this flag if you want to query tweets in \na specific language. You can choose from:\n" "en (English)\nar (Arabic)\nbn (Bengali)\n" "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n" "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n" "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n" "id (Indonesian)\nit (Italian)\nja (Japanese)\n" "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n" "no (Norwegian)\npl (Polish)\npt (Portuguese)\n" "ro (Romanian)\nru (Russian)\nsv (Swedish)\n" "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n" "ur (Urdu)\nvi (Vietnamese)\n" "zh-cn (Chinese Simplified)\n" "zh-tw (Chinese Traditional)" ) parser.add_argument("-d", "--dump", action="store_true", help="Set this flag if you want to dump the tweets \nto the console rather than outputting to a file") parser.add_argument("-bd", "--begindate", type=valid_date, default="2006-03-21", help="Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21", metavar='\b') parser.add_argument("-ed", "--enddate", type=valid_date, default=dt.date.today(), help="Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.", metavar='\b') parser.add_argument("-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n" "Default value is set to 20. \nYou can change this number if you have more computing power available. \n" "Set to 1 if you dont want to run any parallel processes.", metavar='\b') args = parser.parse_args() if isfile(args.output) and not args.dump: logger.error("Output file already exists! Aborting.") exit(-1) if args.all: args.begindate = dt.date(2006,3,1) if args.user: tweets = query_tweets_from_user(user = args.query, limit = args.limit) else: tweets = query_tweets(query = args.query, limit = args.limit, begindate = args.begindate, enddate = args.enddate, poolsize = args.poolsize, lang = args.lang) if args.dump: print(json.dumps(tweets, cls=JSONEncoder)) else: if tweets: with open(args.output, "w", encoding="utf-8") as output: if args.csv: f = csv.writer(output) f.writerow(["user", "fullname", "tweet-id", "timestamp", "url", "likes", "replies", "retweets", "text", "html"]) for x in tweets: f.writerow([x.user, x.fullname, x.id, x.timestamp, x.url, x.likes, x.replies, x.retweets, x.text, x.html]) else: json.dump(tweets, output, cls=JSONEncoder) except KeyboardInterrupt: logger.info("Program interrupted by user. Quitting...")
def get_user_tweets(self, username, limit): tweets = query_tweets_from_user(username, limit=limit) tweets.reverse() return tweet.Tweet.create_from_scraper_response(tweets)
def main(): try: parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, description=__doc__) parser.add_argument("query", type=str, help="Advanced twitter query") parser.add_argument("-o", "--output", type=str, default="tweets.json", help="Path to a JSON file to store the gathered " "tweets to.") parser.add_argument("-l", "--limit", type=int, default=None, help="Number of minimum tweets to gather.") parser.add_argument( "-a", "--all", action='store_true', help="Set this flag if you want to get all tweets " "in the history of twitter. Begindate is set to 2006-03-01." "This may take a while. You can increase the number of parallel" "processes depending on the computational power you have.") parser.add_argument( "-c", "--csv", action='store_true', help= "Set this flag if you want to save the results to a CSV format.") parser.add_argument( "-u", "--user", action='store_true', help= "Set this flag to if you want to scrape tweets from a specific user" "The query should then consist of the profilename you want to scrape without @" ) parser.add_argument( "--profiles", action='store_true', help= "Set this flag to if you want to scrape profile info of all the users where you" "have previously scraped from. After all of the tweets have been scraped it will start" "a new process of scraping profile pages.") parser.add_argument( "--lang", type=str, default=None, help= "Set this flag if you want to query tweets in \na specific language. You can choose from:\n" "en (English)\nar (Arabic)\nbn (Bengali)\n" "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n" "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n" "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n" "id (Indonesian)\nit (Italian)\nja (Japanese)\n" "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n" "no (Norwegian)\npl (Polish)\npt (Portuguese)\n" "ro (Romanian)\nru (Russian)\nsv (Swedish)\n" "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n" "ur (Urdu)\nvi (Vietnamese)\n" "zh-cn (Chinese Simplified)\n" "zh-tw (Chinese Traditional)") parser.add_argument( "-d", "--dump", action="store_true", help= "Set this flag if you want to dump the tweets \nto the console rather than outputting to a file" ) parser.add_argument( "-ow", "--overwrite", action="store_true", help= "Set this flag if you want to overwrite the existing output file.") parser.add_argument( "-bd", "--begindate", type=valid_date, default="2006-03-21", help= "Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21", metavar='\b') parser.add_argument( "-ed", "--enddate", type=valid_date, default=dt.date.today(), help= "Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.", metavar='\b') parser.add_argument( "-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n" "Default value is set to 20. \nYou can change this number if you have more computing power available. \n" "Set to 1 if you dont want to run any parallel processes.", metavar='\b') parser.add_argument( "--loglevel", type=valid_loglevel, default=logging.INFO, help="Specify the level for logging. \n" "Must be a valid value from https://docs.python.org/2/library/logging.html#logging-levels. \n" "Default log level is set to INFO.") parser.add_argument( "-dp", "--disableproxy", action="store_true", default=False, help= "Set this flag if you want to disable use of proxy servers when scrapping tweets and user profiles. \n" ) args = parser.parse_args() logging.basicConfig() logger.setLevel(args.loglevel) if isfile(args.output) and not args.dump and not args.overwrite: logger.error("Output file already exists! Aborting.") exit(-1) if args.all: args.begindate = dt.date(2006, 3, 1) if args.user: tweets = query_tweets_from_user(user=args.query, limit=args.limit, use_proxy=not args.disableproxy) else: tweets = query_tweets(query=args.query, limit=args.limit, begindate=args.begindate, enddate=args.enddate, poolsize=args.poolsize, lang=args.lang, use_proxy=not args.disableproxy) if args.dump: pprint([tweet.__dict__ for tweet in tweets]) else: if tweets: with open(args.output, "w", encoding="utf-8") as output: if args.csv: f = csv.writer(output, delimiter=";", quoting=csv.QUOTE_NONNUMERIC) f.writerow([ "screen_name", "username", "user_id", "tweet_id", "tweet_url", "timestamp", "timestamp_epochs", "text", "text_html", "links", "hashtags", "has_media", "img_urls", "video_url", "likes", "retweets", "replies", "is_replied", "is_reply_to", "parent_tweet_id", "reply_to_users" ]) for t in tweets: f.writerow([ t.screen_name, t.username, t.user_id, t.tweet_id, t.tweet_url, t.timestamp, t.timestamp_epochs, t.text, t.text_html, t.links, t.hashtags, t.has_media, t.img_urls, t.video_url, t.likes, t.retweets, t.replies, t.is_replied, t.is_reply_to, t.parent_tweet_id, t.reply_to_users ]) else: json.dump(tweets, output, cls=JSONEncoder) if args.profiles and tweets: list_users = list(set([tweet.username for tweet in tweets])) list_users_info = [ query_user_info(elem, not args.disableproxy) for elem in list_users ] filename = 'userprofiles_' + args.output with open(filename, "w", encoding="utf-8") as output: json.dump(list_users_info, output, cls=JSONEncoder) except KeyboardInterrupt: logger.info("Program interrupted by user. Quitting...")
def get_tweets(username, limit): """Given a Twitter username, return a list of their most recent tweets.""" tweets = [] for tweet in query_tweets_from_user(username, limit): tweets.append(tweet.text) return tweets
def get_user_tweets(): user_tweets_list = query.query_tweets_from_user(user=q, limit=tweets_num) return user_tweets_list
def main(): try: parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=__doc__ ) parser.add_argument("query", type=str, help="Advanced twitter query") parser.add_argument("-o", "--output", type=str, default="tweets.json", help="Path to a JSON file to store the gathered " "tweets to.") parser.add_argument("-l", "--limit", type=int, default=None, help="Number of minimum tweets to gather.") parser.add_argument("-a", "--all", action='store_true', help="Set this flag if you want to get all tweets " "in the history of twitter. Begindate is set to 2006-03-01." "This may take a while. You can increase the number of parallel" "processes depending on the computational power you have.") parser.add_argument("-c", "--csv", action='store_true', help="Set this flag if you want to save the results to a CSV format.") parser.add_argument("-u", "--user", action='store_true', help="Set this flag to if you want to scrape tweets from a specific user" "The query should then consist of the profilename (user) you want to scrape without @") parser.add_argument("--profiles", action='store_true', help="Set this flag to if you want to scrape profile info of all the users where you" "have previously scraped from. After all of the tweets have been scraped it will start" "a new process of scraping profile pages.") parser.add_argument("--lang", type=str, default=None, help="Set this flag if you want to query tweets in \na specific language. You can choose from:\n" "en (English)\nar (Arabic)\nbn (Bengali)\n" "cs (Czech)\nda (Danish)\nde (German)\nel (Greek)\nes (Spanish)\n" "fa (Persian)\nfi (Finnish)\nfil (Filipino)\nfr (French)\n" "he (Hebrew)\nhi (Hindi)\nhu (Hungarian)\n" "id (Indonesian)\nit (Italian)\nja (Japanese)\n" "ko (Korean)\nmsa (Malay)\nnl (Dutch)\n" "no (Norwegian)\npl (Polish)\npt (Portuguese)\n" "ro (Romanian)\nru (Russian)\nsv (Swedish)\n" "th (Thai)\ntr (Turkish)\nuk (Ukranian)\n" "ur (Urdu)\nvi (Vietnamese)\n" "zh-cn (Chinese Simplified)\n" "zh-tw (Chinese Traditional)" ) parser.add_argument("-d", "--dump", action="store_true", help="Set this flag if you want to dump the tweets \nto the console rather than outputting to a file") parser.add_argument("-bd", "--begindate", type=valid_date, default="2006-03-21", help="Scrape for tweets starting from this date. Format YYYY-MM-DD. \nDefault value is 2006-03-21", metavar='\b') parser.add_argument("-ed", "--enddate", type=valid_date, default=dt.date.today(), help="Scrape for tweets until this date. Format YYYY-MM-DD. \nDefault value is the date of today.", metavar='\b') parser.add_argument("-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n" "Default value is set to 20. \nYou can change this number if you have more computing power available. \n" "Set to 1 if you dont want to run any parallel processes.", metavar='\b') args = parser.parse_args() if isfile(args.output) and not args.dump: logger.error("Output file already exists! Aborting.") exit(-1) if args.all: args.begindate = dt.date(2006,3,1) args.enddate = dt.date.today() if args.user: tweets = query_tweets_from_user(user = args.query, limit = args.limit) else: tweets = query_tweets(query = args.query, limit = args.limit, begindate = args.begindate, enddate = args.enddate, poolsize = args.poolsize, lang = args.lang) if args.dump: print(json.dumps(tweets, cls=JSONEncoder)) else: if tweets: with open(args.output, "w", encoding="utf-8") as output: if args.csv: f = csv.writer(output) f.writerow(["user", "fullname", "tweet-id", "timestamp", "url", "likes", "replies", "retweets", "text", "html"]) for x in tweets: f.writerow([x.user, x.fullname, x.id, x.timestamp, x.url, x.likes, x.replies, x.retweets, x.text, x.html]) else: json.dump(tweets, output, cls=JSONEncoder) if args.profiles and tweets: list_users = list(set([tweet.user for tweet in tweets])) # list_users_info = [query_user_info(elem) for elem in list_users] filename = 'userprofiles_' + args.output with open(filename, "w", encoding="utf-8") as output: if args.csv: f = csv.writer(output) f.writerow(["user","fullname","location","blog","date_joined","id","num_tweets","following","followers","likes","lists"]) for elem in list_users: u = query_user_info(elem) if u is None: continue else: f.writerow([u.user, u.full_name, u.location, u.blog, u.date_joined, u.id, u.tweets, u.following, u.followers, u.likes, u.lists]) else: for elem in list_users: u = query_user_info(elem) if u is None: continue else: json.dump(u, output, cls=JSONEncoder, indent=2) except KeyboardInterrupt: logger.info("Program interrupted by user. Quitting...")
def get_tweet(): tweet_list = query_tweets_from_user(target_account_id, page_limit) for tweet in tweet_list: analysis_tweet(tweet)
def get_user_tweets(user, limit): return query_tweets_from_user(user, limit=limit)
import pandas as pd import numpy as np import os from twitterscraper import query_tweets from twitterscraper.query import query_tweets_from_user username = "******" c.Output = r"data_scraping/data_retweet/test_csv.csv" try: os.remove("data_scraping/data_retweet/test_csv.csv") except: pass # CSV Fieldnames list_of_tweets = query_tweets_from_user(username, limit=10) #print the retrieved tweets to the screen: for tweet in list_of_tweets: print(tweet.from_soup()) #Or save the retrieved tweets to file: # with open("data_scraping/data_retweet/test.txt",'w') as file: # for tweet in query_tweets("Trump OR Clinton", 10): # file.write(tweet.encode('utf-8')) # file.close() if __name__ == '__main__': # recieve list of participant id in seed group pro_colname = "Pro-Israeli sources on Twitter" anti_colname = "Anti Israeli sources on Twitter" seed_list = pd.read_csv(r"data_scraping/twitter_seeds.csv", header=0)