def scrape_data(): topic = request.args.get('topic') hashtag = request.args.get('hashtag') startdate = request.args.get('startdate') enddate = request.args.get('enddate') filename = f'{topic}_{startdate}_{enddate}' print(topic, hashtag, startdate, enddate) data = scrap(words=topic, hashtag=hashtag, start_date=startdate, max_date=enddate, from_account=None, interval=1, headless=True, display_type="Top", save_images=False, resume=False, filter_replies=True, proximity=False, lang='en') # users = ['nagouzil', '@yassineaitjeddi', 'TahaAlamIdrissi', # '@Nabila_Gl', 'geceeekusuu', '@pabu232', '@av_ahmet', '@x_born_to_die_x'] # users_info = get_user_information(users, headless=True) word_cloud(data.Text, filename) tweet_daily(data, filename) sample = data.head().drop( columns=['UserScreenName', 'Embedded_text', 'Tweet URL', 'Image link'],) sample.Timestamp = pd.to_datetime( sample['Timestamp']).dt.strftime('%d/%m/%Y') # to preserve memory remove data data = None return render_template('dataframe.html', tables=[sample.to_html(classes='data', index=False)], wordcloud=f'static/images/{filename}_wordcloud.png', tweet_daily=f'static/images/{filename}_tweet_daily.png')
def get_tweets(self, start, end, resume=False): #get the tweets using selenium scrap(start_date=start, max_date=end, from_account=self.username, interval=5, headless=True, display_type="Top", hashtag=None, save_images=False, show_images=False, resume=resume)
from Scweet.scweet import scrap from Scweet.user import get_user_information, get_users_following, get_users_followers # scrape top tweets with the words 'covid','covid19' in proximity and without replies. # the process is slower as the interval is smaller (choose an interval that can divide the period of time betwee, start and max date) data = scrap(words=['covid', 'covid19'], start_date="2020-04-01", max_date="2020-04-15", from_account=None, interval=1, headless=True, display_type="Top", save_images=False, resume=False, filter_replies=True, proximity=True) # scrape top tweets of with the hashtag #covid19, in proximity and without replies. # the process is slower as the interval is smaller (choose an interval that can divide the period of time betwee, start and max date) data = scrap(hashtag="covid19", start_date="2020-04-01", max_date="2020-04-15", from_account=None, interval=1, headless=True, display_type="Top", save_images=False, resume=False, filter_replies=True,