def scrape_data():
    topic = request.args.get('topic')
    hashtag = request.args.get('hashtag')
    startdate = request.args.get('startdate')
    enddate = request.args.get('enddate')
    filename = f'{topic}_{startdate}_{enddate}'

    print(topic, hashtag, startdate, enddate)
    data = scrap(words=topic, hashtag=hashtag, start_date=startdate, max_date=enddate, from_account=None, interval=1,
                 headless=True, display_type="Top", save_images=False, resume=False, filter_replies=True, proximity=False, lang='en')

    # users = ['nagouzil', '@yassineaitjeddi', 'TahaAlamIdrissi',
    #         '@Nabila_Gl', 'geceeekusuu', '@pabu232', '@av_ahmet', '@x_born_to_die_x']

    # users_info = get_user_information(users, headless=True)
    word_cloud(data.Text, filename)
    tweet_daily(data, filename)
    sample = data.head().drop(
        columns=['UserScreenName', 'Embedded_text', 'Tweet URL', 'Image link'],)
    sample.Timestamp = pd.to_datetime(
        sample['Timestamp']).dt.strftime('%d/%m/%Y')

    #  to preserve memory remove data
    data = None
    return render_template('dataframe.html',  tables=[sample.to_html(classes='data', index=False)], wordcloud=f'static/images/{filename}_wordcloud.png', tweet_daily=f'static/images/{filename}_tweet_daily.png')
 def get_tweets(self, start, end, resume=False):
     #get the tweets using selenium
     scrap(start_date=start, max_date=end, from_account=self.username, interval=5,
           headless=True, display_type="Top", hashtag=None, save_images=False, show_images=False, resume=resume)
Ejemplo n.º 3
0
from Scweet.scweet import scrap
from Scweet.user import get_user_information, get_users_following, get_users_followers

# scrape top tweets with the words 'covid','covid19' in proximity and without replies.
# the process is slower as the interval is smaller (choose an interval that can divide the period of time betwee, start and max date)

data = scrap(words=['covid', 'covid19'],
             start_date="2020-04-01",
             max_date="2020-04-15",
             from_account=None,
             interval=1,
             headless=True,
             display_type="Top",
             save_images=False,
             resume=False,
             filter_replies=True,
             proximity=True)

# scrape top tweets of with the hashtag #covid19, in proximity and without replies.
# the process is slower as the interval is smaller (choose an interval that can divide the period of time betwee, start and max date)

data = scrap(hashtag="covid19",
             start_date="2020-04-01",
             max_date="2020-04-15",
             from_account=None,
             interval=1,
             headless=True,
             display_type="Top",
             save_images=False,
             resume=False,
             filter_replies=True,