예제 #1
0
def test_runner_exceptions():
    class TokenExpiryExceptionWebClient(st.WebClient):
        count_dict = dict({
            'https://api.twitter.com/2/search/adaptive.json': 0,
            'https://api.twitter.com/1.1/guest/activate.json': 0
        })

        def run_request(
            self, params: st.http_request.RequestDetails
        ) -> st.http_request.RequestResponse:
            self.count_dict[params.url] = self.count_dict[params.url] + 1
            if params.url == 'https://api.twitter.com/2/search/adaptive.json':
                if self.count_dict[params.url] == 1:
                    return st.http_request.RequestResponse(429, None)
                else:
                    return st.http_request.RequestResponse(400, '')
            else:
                return st.http_request.RequestResponse(
                    200, '{"guest_token":"1350356785648062465"}')

    with pytest.raises(ScrapBatchBadResponse):
        search_tweets_task = st.SearchTweetsTask(all_words='#koronawirus')
        st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                             tweet_outputs=[],
                             web_client=TokenExpiryExceptionWebClient(),
                             auth_token_provider_factory=st.auth.
                             SimpleAuthTokenProviderFactory()).run()
예제 #2
0
def _scrap_tweets_with_count_assert(count: int):
    phrase = '#covid19'
    search_tweets_task = st.SearchTweetsTask(all_words=phrase,
                                             tweets_limit=count)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    assert len(tweets_collector.get_scrapped_tweets()) == count
예제 #3
0
def test_return_tweets_from_user():
    username = '******'
    search_tweets_task = st.SearchTweetsTask(from_username=username,
                                             tweets_limit=100)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(tweets_collector.get_scrapped_tweets(),
                                lambda tweet: tweet.user_name == username)
예제 #4
0
def test_exact_words():
    exact_phrase = 'duda kaczyński kempa'
    search_tweets_task = st.SearchTweetsTask(exact_words=exact_phrase)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_raw_list(), lambda tweet: to_base_text(
            exact_phrase) in to_base_text(tweet.full_text))
예제 #5
0
def _run_test_between_dates(since: Arrow, until: Arrow):
    search_tweets_task = st.SearchTweetsTask(any_word="#koronawirus #covid19",
                                             since=since,
                                             until=until)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_scrapped_tweets(),
        lambda tweet: since <= tweet.created_at <= until)
예제 #6
0
def test_search_to_username():
    username = '******'
    search_tweets_task = st.SearchTweetsTask(to_username=username,
                                             tweets_limit=100)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_scrapped_tweets(),
        lambda tweet: to_base_text(username) in to_base_text(tweet.full_text))
def _run_search_test_covid_tweets_in_language(language: st.Language):
    search_tweets_task = st.SearchTweetsTask(all_words='#covid19',
                                             tweets_limit=100,
                                             language=language)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_scrapped_tweets(),
        lambda tweet: tweet.lang == language.short_value)
예제 #8
0
def get_tweets_to_tweet_output_test(tweet_output: List[st.TweetOutput]):
    phrase = '#koronawirus'
    search_tweets_task = st.SearchTweetsTask(
        all_words=phrase,
        tweets_limit=200
    )
    st.TweetSearchRunner(
        search_tweets_task=search_tweets_task,
        tweet_outputs=tweet_output
    ).run()
예제 #9
0
def run_test_for_single_language(language: st.Language):
    search_tweets_task = st.SearchTweetsTask(all_words='#covid19',
                                             tweets_limit=10,
                                             language=language)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_raw_list(),
        lambda tweet: tweet.lang in language.short_value)
예제 #10
0
def search_by_hashtag():
    phrase = '#koronawirus'
    search_tweets_task = st.SearchTweetsTask(all_words=phrase,
                                             tweets_limit=200)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    scrapped_tweets = tweets_collector.get_raw_list()
    assert all([
        phrase in it.full_text
        for it in scrapped_tweets if phrase in it.full_text
    ]) is True
def test_using_proxy_client():
    task = st.SearchTweetsTask(all_words='#covid19', tweets_limit=200)
    proxy_client = st.RequestsWebClient(
        st.RequestsWebClientProxyConfig(http_proxy='http://localhost:3128',
                                        https_proxy='http://localhost:3128'))
    tweets_collector = st.CollectorTweetOutput()
    result = st.TweetSearchRunner(search_tweets_task=task,
                                  tweet_outputs=[tweets_collector],
                                  web_client=proxy_client).run()
    scrapped_tweets = tweets_collector.get_scrapped_tweets()
    assert isinstance(result, st.SearchTweetsResult)
    assert len(scrapped_tweets) == task.tweets_limit
예제 #12
0
def test_any_word():
    any_phrase = 'kaczynski tusk'
    search_tweets_task = st.SearchTweetsTask(any_word=any_phrase,
                                             tweets_limit=100)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()

    tweet_list_assert_condition(
        tweets_collector.get_raw_list(),
        lambda tweet: contains_any_word(any_phrase, tweet.full_text) or
        contains_any_word(any_phrase, tweet.user_full_name
                          ) or contains_any_word(any_phrase, tweet.user_name))
예제 #13
0
def twitter_report():
    username = request.form['twitteruser']
    if username:
        # Configure
        search_tweets_task = st.SearchTweetsTask(all_words='#covid19' ,tweets_count=20)
        tweets_collector = st.CollectorTweetOutput()
        st.TweetSearchRunner(search_tweets_task=search_tweets_task,tweet_outputs=[tweets_collector, st.CsvTweetOutput('output_file.csv')]).run()

        tweets = tweets_collector.get_scrapped_tweets()
        
        return render_template("tweets.html", tweets=tweets, username=username)


    else:
        return render_template("error.html")
예제 #14
0
def test_return_tweets_objects():
    phrase = '#koronawirus'
    search_tweets_task = st.SearchTweetsTask(all_words=phrase,
                                             tweets_limit=200)
    tweets_collector = st.CollectorTweetOutput()
    result = st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                                  tweet_outputs=[tweets_collector]).run()
    scrapped_tweets = tweets_collector.get_scrapped_tweets()
    assert isinstance(result, st.SearchTweetsResult)
    assert result.downloaded_count == len(scrapped_tweets)
    assert result.downloaded_count > 0
    assert all([
        phrase in it.full_text
        for it in scrapped_tweets if phrase in it.full_text
    ]) is True
예제 #15
0
def test_search_as_replay():
    search_tweets_task = st.SearchTweetsTask(
        all_words='#covid19',
        tweets_limit=500,
        replies_filter=st.RepliesFilter.ONLY_REPLIES
    )
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(
        search_tweets_task=search_tweets_task,
        tweet_outputs=[tweets_collector]
    ).run()
    tweet_list_assert_condition(
        tweets_collector.get_raw_list(),
        lambda tweet: len(tweet.in_reply_to_status_id_str + tweet.in_reply_to_user_id_str) > 0
    )
예제 #16
0
def get_tweets() -> List[st.Tweet]:
    collect_tweet_output = st.CollectorTweetOutput()
    task = st.SearchTweetsTask(all_words="#covid19", tweets_limit=100)
    st.TweetSearchRunner(task, [collect_tweet_output]).run()
    return collect_tweet_output.get_scrapped_tweets()