Example #1
0
def test_scrap_tweet_with_double_media():
    tweets_ids = ['1115978039534297088']
    collector = st.CollectorTweetOutput()
    st.TweetsByIdsRunner(st.TweetsByIdsTask(tweets_ids), [collector]).run()
    tweets = collector.get_scrapped_tweets()
    assert len(tweets) == 1
    assert len(tweets[0].media) == 2
Example #2
0
def test_scrap_tweet_with_single_media():
    tweets_ids = ['1357358278746005508']
    collector = st.CollectorTweetOutput()
    st.TweetsByIdsRunner(st.TweetsByIdsTask(tweets_ids), [collector]).run()
    tweets = collector.get_scrapped_tweets()
    assert len(tweets) == 1
    assert len(tweets[0].media) == 1
Example #3
0
def test_csv_serialization():
    csv_filename = get_temp_test_file_name('csv')
    tweets_collector = st.CollectorTweetOutput()
    get_tweets_to_tweet_output_test(
        [st.CsvTweetOutput(csv_filename), tweets_collector])
    tweets_from_csv = st.read_tweets_from_csv_file(csv_filename)
    two_lists_assert_equal(tweets_from_csv, tweets_collector.get_raw_list())
Example #4
0
def test_file_json_lines_serialization():
    jl_filename = get_temp_test_file_name('jl')
    tweets_collector = st.CollectorTweetOutput()
    get_tweets_to_tweet_output_test(
        [st.JsonLineFileTweetOutput(jl_filename), tweets_collector])
    tweets_from_jl = st.read_tweets_from_json_lines_file(jl_filename)
    two_lists_assert_equal(tweets_from_jl, tweets_collector.get_raw_list())
Example #5
0
def _scrap_tweets_with_count_assert(count: int):
    phrase = '#covid19'
    search_tweets_task = st.SearchTweetsTask(all_words=phrase,
                                             tweets_limit=count)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    assert len(tweets_collector.get_scrapped_tweets()) == count
Example #6
0
def test_print_all_tweet_output():
    captured_output = StringIO()
    sys.stdout = captured_output
    tweets_collector = st.CollectorTweetOutput()
    get_tweets_to_tweet_output_test([st.PrintTweetOutput(), tweets_collector])
    sys.stdout = sys.__stdout__
    assert captured_output.getvalue().count('Tweet(') == len(
        tweets_collector.get_raw_list())
Example #7
0
def test_return_tweets_from_user():
    username = '******'
    search_tweets_task = st.SearchTweetsTask(from_username=username,
                                             tweets_limit=100)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(tweets_collector.get_scrapped_tweets(),
                                lambda tweet: tweet.user_name == username)
Example #8
0
def test_exact_words():
    exact_phrase = 'duda kaczyƄski kempa'
    search_tweets_task = st.SearchTweetsTask(exact_words=exact_phrase)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_raw_list(), lambda tweet: to_base_text(
            exact_phrase) in to_base_text(tweet.full_text))
Example #9
0
def test_search_to_username():
    username = '******'
    search_tweets_task = st.SearchTweetsTask(to_username=username,
                                             tweets_limit=100)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_scrapped_tweets(),
        lambda tweet: to_base_text(username) in to_base_text(tweet.full_text))
def _run_search_test_covid_tweets_in_language(language: st.Language):
    search_tweets_task = st.SearchTweetsTask(all_words='#covid19',
                                             tweets_limit=100,
                                             language=language)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_scrapped_tweets(),
        lambda tweet: tweet.lang == language.short_value)
Example #11
0
def _run_test_between_dates(since: Arrow, until: Arrow):
    search_tweets_task = st.SearchTweetsTask(any_word="#koronawirus #covid19",
                                             since=since,
                                             until=until)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_scrapped_tweets(),
        lambda tweet: since <= tweet.created_at <= until)
Example #12
0
def test_get_not_existing_tweet():
    tweets_ids = ['1337071849772093442']
    task = st.TweetsByIdTask(tweets_ids)
    collect_output = st.CollectorTweetOutput()
    result = st.TweetsByIdRunner(
        task, [collect_output],
        web_client=CustomAdapter([('https://cdn.syndication.twimg.com/tweet',
                                   RequestResponse(404, ''))])).run()
    assert result.downloaded_count == 0
    assert len(result.tweet_ids_not_scrapped) == 1
Example #13
0
def run_test_for_single_language(language: st.Language):
    search_tweets_task = st.SearchTweetsTask(all_words='#covid19',
                                             tweets_limit=10,
                                             language=language)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    tweet_list_assert_condition(
        tweets_collector.get_raw_list(),
        lambda tweet: tweet.lang in language.short_value)
Example #14
0
def test_get_tweets_by_ids():
    tweets_ids = ['1337071849772093442', '1337067073051238400']
    task = st.TweetsByIdsTask(tweets_ids)
    collect_output = st.CollectorTweetOutput()
    result = st.TweetsByIdsRunner(task, [collect_output]).run()
    scrapped_tweets_ids = [
        it.id_str for it in collect_output.get_scrapped_tweets()
    ]
    assert result.downloaded_count == 1
    assert len(collect_output.get_scrapped_tweets()) == 1
    assert len(result.tweet_ids_not_scrapped) == 1
Example #15
0
def search_by_hashtag():
    phrase = '#koronawirus'
    search_tweets_task = st.SearchTweetsTask(all_words=phrase,
                                             tweets_limit=200)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()
    scrapped_tweets = tweets_collector.get_raw_list()
    assert all([
        phrase in it.full_text
        for it in scrapped_tweets if phrase in it.full_text
    ]) is True
Example #16
0
def test_get_not_existing_tweet_in_twitter():
    tweets_ids = ['1337071849772093442']
    task = st.TweetsByIdTask(tweets_ids)
    collect_output = st.CollectorTweetOutput()
    result = st.TweetsByIdRunner(
        task, [collect_output],
        web_client=CustomAdapter([
            ('https://api.twitter.com/2/search/adaptive.json',
             RequestResponse(200, _TWITTER_JSON_NO_TWEETS))
        ])).run()
    assert result.downloaded_count == 0
    assert len(result.tweet_ids_not_scrapped) == 1
def test_using_proxy_client():
    task = st.SearchTweetsTask(all_words='#covid19', tweets_limit=200)
    proxy_client = st.RequestsWebClient(
        st.RequestsWebClientProxyConfig(http_proxy='http://localhost:3128',
                                        https_proxy='http://localhost:3128'))
    tweets_collector = st.CollectorTweetOutput()
    result = st.TweetSearchRunner(search_tweets_task=task,
                                  tweet_outputs=[tweets_collector],
                                  web_client=proxy_client).run()
    scrapped_tweets = tweets_collector.get_scrapped_tweets()
    assert isinstance(result, st.SearchTweetsResult)
    assert len(scrapped_tweets) == task.tweets_limit
Example #18
0
def test_tweet_csv_read_iterator():
    file_name = get_temp_test_file_name('csv')
    collector = st.CollectorTweetOutput()
    get_tweets_to_tweet_output_test([collector, st.CsvTweetOutput(file_name)])
    iterator = st.TweetCsvFileIterator(file_name, 4)
    list_from_iterator = []
    iterator.open()
    while True:
        try:
            list_from_iterator.extend(next(iterator))
        except StopIteration:
            break
    two_lists_assert_equal(list_from_iterator, collector.get_raw_list())
Example #19
0
def test_get_tweets_by_ids():
    tweets_ids = ['1337071849772093442', '1337067073051238400']
    task = st.TweetsByIdTask(tweets_ids)
    collect_output = st.CollectorTweetOutput()
    result = st.TweetsByIdRunner(task, [collect_output],
                                 web_client=RequestsWebClient(interceptors=[
                                     LoggingRequestsWebClientInterceptor(),
                                     TwitterAuthWebClientInterceptor()
                                 ])).run()
    scrapped_tweets_ids = [it.id_str for it in collect_output.get_raw_list()]
    assert result.downloaded_count == 1
    assert len(collect_output.get_raw_list()) == 1
    assert len(result.tweet_ids_not_scrapped) == 1
Example #20
0
def test_any_word():
    any_phrase = 'kaczynski tusk'
    search_tweets_task = st.SearchTweetsTask(any_word=any_phrase,
                                             tweets_limit=100)
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                         tweet_outputs=[tweets_collector]).run()

    tweet_list_assert_condition(
        tweets_collector.get_raw_list(),
        lambda tweet: contains_any_word(any_phrase, tweet.full_text) or
        contains_any_word(any_phrase, tweet.user_full_name
                          ) or contains_any_word(any_phrase, tweet.user_name))
Example #21
0
def test_tweet_json_lines_read_iterator():
    file_name = get_temp_test_file_name('jl')
    collector = st.CollectorTweetOutput()
    get_tweets_to_tweet_output_test([collector, st.JsonLineFileTweetOutput(file_name)])
    iterator = st.TweetJsonLineFileIterator(file_name, 4)
    list_from_iterator = []
    iterator.open()
    while True:
        try:
            list_from_iterator.extend(next(iterator))
        except StopIteration:
            break
    iterator.close()
    two_lists_assert_equal(list_from_iterator, collector.get_raw_list())
Example #22
0
def test_search_as_replay():
    search_tweets_task = st.SearchTweetsTask(
        all_words='#covid19',
        tweets_limit=500,
        replies_filter=st.RepliesFilter.ONLY_REPLIES
    )
    tweets_collector = st.CollectorTweetOutput()
    st.TweetSearchRunner(
        search_tweets_task=search_tweets_task,
        tweet_outputs=[tweets_collector]
    ).run()
    tweet_list_assert_condition(
        tweets_collector.get_raw_list(),
        lambda tweet: len(tweet.in_reply_to_status_id_str + tweet.in_reply_to_user_id_str) > 0
    )
Example #23
0
def twitter_report():
    username = request.form['twitteruser']
    if username:
        # Configure
        search_tweets_task = st.SearchTweetsTask(all_words='#covid19' ,tweets_count=20)
        tweets_collector = st.CollectorTweetOutput()
        st.TweetSearchRunner(search_tweets_task=search_tweets_task,tweet_outputs=[tweets_collector, st.CsvTweetOutput('output_file.csv')]).run()

        tweets = tweets_collector.get_scrapped_tweets()
        
        return render_template("tweets.html", tweets=tweets, username=username)


    else:
        return render_template("error.html")
Example #24
0
def test_return_tweets_objects():
    phrase = '#koronawirus'
    search_tweets_task = st.SearchTweetsTask(all_words=phrase,
                                             tweets_limit=200)
    tweets_collector = st.CollectorTweetOutput()
    result = st.TweetSearchRunner(search_tweets_task=search_tweets_task,
                                  tweet_outputs=[tweets_collector]).run()
    scrapped_tweets = tweets_collector.get_scrapped_tweets()
    assert isinstance(result, st.SearchTweetsResult)
    assert result.downloaded_count == len(scrapped_tweets)
    assert result.downloaded_count > 0
    assert all([
        phrase in it.full_text
        for it in scrapped_tweets if phrase in it.full_text
    ]) is True
Example #25
0
def get_tweets() -> List[st.Tweet]:
    collect_tweet_output = st.CollectorTweetOutput()
    task = st.SearchTweetsTask(all_words="#covid19", tweets_limit=100)
    st.TweetSearchRunner(task, [collect_tweet_output]).run()
    return collect_tweet_output.get_scrapped_tweets()