def test_render_v2_generic_api_error(): with _temp_tarfile([ lambda: _temp_json_lz4( "API-ERROR.lz4", { "title": "bad-request", "errors": [{ "message": "a message from Twitter" }], }, { "cjw:apiEndpoint": "2/tweets/search/recent", "cjw:apiParams": "expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id.author_id&max_results=100&query=science&tweet.fields=id%2Ctext%2Cauthor_id%2Ccreated_at%2Cin_reply_to_user_id%2Cpublic_metrics%2Csource%2Clang%2Creferenced_tweets&user.fields=id%2Cdescription%2Cusername%2Cname", "cjw:httpStatus": "400", }, ) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), None, [ i18n_message( "error.genericApiErrorV2", { "title": "bad-request", "message": "a message from Twitter" }, ) ], )
def test_render_undefined_language_is_null(): # https://blog.twitter.com/developer/en_us/a/2013/introducing-new-metadata-for-tweets.html with _temp_tarfile([ lambda: _temp_json_path_lz4( "1088215462867959800.json.lz4", Path("tests/files/1_1_one_undefined_lang.json"), {"cjw:apiEndpoint": "1.1/statuses/user_timeline.json"}, ) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), pa.table({ "screen_name": ["workbenchdata"], "created_at": pa.array([dt("Wed Jan 23 23:22:39 +0000 2019")], pa.timestamp("ns")), "text": ["🤖 https://t.co/FOhOfZT9MZ"], "retweet_count": [0], "favorite_count": [1], "in_reply_to_screen_name": pa.nulls(1, pa.utf8()), "retweeted_status_screen_name": pa.nulls(1, pa.utf8()), "user_description": [ "Scrape, clean and analyze data without code. Create reproducible data workflows that can be shared with others" ], "source": ["Twitter for iPhone"], # "lang" is the key data point we're testing "lang": pa.nulls(1, pa.utf8()), "id": [1088215462867959800], }), [], )
def test_render_v1_1_generic_api_error(): with _temp_tarfile([ lambda: _temp_json_lz4( "API-ERROR.lz4", {"error": "a message from Twitter"}, { "cjw:apiEndpoint": "1.1/statuses/user_timeline", "cjw:apiParams": "count=200&screen_name=adamhooper", "cjw:httpStatus": "500", }, ) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), None, [ i18n_message( "error.genericApiErrorV1_1", { "httpStatus": "500", "error": "a message from Twitter" }, ) ], )
def test_render_v0_add_retweet_status_screen_name(): # Migration: what happens when we accumulate tweets # where the old stored table does not have retweet_status_screen_name? # We should consider those to have just None in that column input_table = pa.table({ "screen_name": ["TheTweepyTester", "TheTweepyTester"], "created_at": pa.array( [dt("2016-11-05T21:38:46Z"), dt("2016-11-05T21:37:13Z")], pa.timestamp("ns"), ), "text": ["Hello", "testing 1000 https://t.co/3vt8ITRQ3w"], "retweet_count": [0, 0], "favorite_count": [0, 0], "in_reply_to_screen_name": pa.array([None, None], pa.utf8()), "user_description": ["", ""], "source": ["Twitter Web Client", "Tweepy dev"], "lang": ["en", "en"], "id": [795017539831103489, 795017147651162112], }) with _temp_parquet_file(input_table) as parquet_path: _assert_render( twitter.FetchResult(parquet_path, []), P(accumulate=False), input_table.add_column(6, "retweeted_status_screen_name", pa.array([None, None], pa.utf8())), [], )
def test_render_retweeted_status_full_text_twitter_api_v1(): with _temp_tarfile([ lambda: _temp_json_path_lz4( "1105492514289512400.json.lz4", Path("tests/files/1_1_one_extended_retweet.json"), {"cjw:apiEndpoint": "1.1/statuses/user_timeline.json"}, ) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), pa.table({ "screen_name": ["workbenchdata"], "created_at": pa.array([dt("Tue Mar 12 15:35:29 +0000 2019")], pa.timestamp("ns")), "text": [ # "text" is the key data point we're testing "RT @JacopoOttaviani: ⚡️ I'm playing with @workbenchdata: absolutely mindblowing. It's like a fusion between ScraperWiki, OpenRefine and Datawrapper. All of it online in the cloud and for free 👉🏽 https://t.co/fleqjI1qCI https://t.co/mmWHJLDjT2 #ddj #dataviz" ], "retweet_count": [7], "favorite_count": [0], "in_reply_to_screen_name": pa.nulls(1, pa.utf8()), "retweeted_status_screen_name": ["JacopoOttaviani"], "user_description": [ "Scrape, clean and analyze data without code. Create reproducible data workflows that can be shared with others" ], "source": ["Twitter for iPhone"], "lang": ["en"], "id": [1105492514289512400], }), [], )
def test_render_v0_truncate_fetch_results(): all_rows = pa.table({ "screen_name": ["TheTweepyTester", "TheTweepyTester"], "created_at": pa.array( [dt("2016-11-05T21:38:46Z"), dt("2016-11-05T21:37:13Z")], pa.timestamp("ns"), ), "text": ["Hello", "testing 1000 https://t.co/3vt8ITRQ3w"], "retweet_count": [0, 0], "favorite_count": [0, 0], "in_reply_to_screen_name": pa.array([None, None], pa.utf8()), "retweeted_status_screen_name": pa.array([None, None], pa.utf8()), "user_description": ["", ""], "source": ["Twitter Web Client", "Tweepy dev"], "lang": ["en", "en"], "id": [795017539831103489, 795017147651162112], }) with _temp_parquet_file(all_rows) as parquet_path: _assert_render( twitter.FetchResult(parquet_path, []), P(accumulate=False), all_rows.slice(0, 1), [], )
def test_render_v0_recover_after_bug_160258591(): # https://www.pivotaltracker.com/story/show/160258591 # 'id', 'retweet_count' and 'favorite_count' had wrong type after # accumulating an empty table. Now the bad data is in our database; # let's convert back to the type we want. input_table = pa.table({ "screen_name": ["TheTweepyTester", "TheTweepyTester"], "created_at": pa.array( [dt("2016-11-05T21:38:46Z"), dt("2016-11-05T21:37:13Z")], pa.timestamp("ns"), ), "text": ["Hello", "testing 1000 https://t.co/3vt8ITRQ3w"], "retweet_count": ["0", "0"], "favorite_count": ["0", "0"], "in_reply_to_screen_name": pa.array([None, None], pa.utf8()), "retweeted_status_screen_name": pa.array([None, None], pa.utf8()), "user_description": ["", ""], "source": ["Twitter Web Client", "Tweepy dev"], "lang": ["en", "en"], "id": ["795017539831103489", "795017147651162112"], }) with _temp_parquet_file(input_table) as parquet_path: _assert_render( twitter.FetchResult(parquet_path, []), P(accumulate=False), (input_table.set_column(3, "retweet_count", pa.array([ 0, 0 ])).set_column(4, "favorite_count", pa.array([0, 0])).set_column( 10, "id", pa.array([795017539831103489, 795017147651162112]))), [], )
def test_render_v0_empty_table_accumulated(): with _temp_parquet_file( twitter.ARROW_SCHEMA.empty_table()) as parquet_path: _assert_render( twitter.FetchResult(parquet_path, []), P(accumulate=True), twitter.ACCUMULATED_SCHEMA.empty_table(), [], )
def test_render_empty_tarfile(): # When we haven't fetched, we shouldn't show any columns (for # consistency with other modules) with _temp_tarfile([]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), twitter.ARROW_SCHEMA.empty_table(), [], )
def test_render_empty_file(): # When we haven't fetched, we shouldn't show any columns (for # consistency with other modules) with tempfile.NamedTemporaryFile() as tf: _assert_render( twitter.FetchResult(Path(tf.name), []), P(accumulate=False), twitter.ARROW_SCHEMA.empty_table(), [], )
def test_render_fetch_generated_error(): # fetch() generates errors when params are invalid _assert_render( twitter.FetchResult( Path("unused"), [twitter.RenderError(i18n_message("error.invalidUsername"))]), P(accumulate=False), None, [i18n_message("error.invalidUsername")], )
def test_render_v0_zero_column_search_result(): # An empty table might be stored as zero-column. This is a bug, but we # must handle it because we have actual data like this. We want to # output all the same columns as a tweet table. with _temp_parquet_file(pa.table({})) as parquet_path: _assert_render( twitter.FetchResult(parquet_path, []), P(accumulate=False), twitter.ARROW_SCHEMA.empty_table(), [], )
def test_render_http_429(): # Twitters' 429 error isn't JSON. Handle it. with _temp_tarfile([ lambda: contextlib.nullcontext(( "API-ERROR.lz4", lz4.frame.compress(b"Rate limit exceeded"), { "cjw:apiEndpoint": "2/tweets/search/recent", "cjw:httpStatus": "429", }, )) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), None, [i18n_message("error.tooManyRequests")], )
def test_render_http_401_user_tweets_are_private(): with _temp_tarfile([ lambda: _temp_json_lz4( "API-ERROR.lz4", {"error": "doesn't really matter"}, { "cjw:apiEndpoint": "1.1/statuses/user_timeline", "cjw:apiParams": "count=200&screen_name=elizabeth1", "cjw:httpStatus": "401", }, ) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), None, [ i18n_message("error.userTweetsArePrivate", {"username": "******"}) ], )
def test_render_http_404_username_not_found(): with _temp_tarfile([ lambda: _temp_json_lz4( "API-ERROR.lz4", {"error": "doesn't really matter"}, { "cjw:apiEndpoint": "1.1/statuses/user_timeline", "cjw:apiParams": "count=200&screen_name=doesnotexistnoreally", "cjw:httpStatus": "404", }, ) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), None, [ i18n_message("error.userDoesNotExist", {"username": "******"}) ], )
def test_render_network_error(): with _temp_tarfile([ lambda: _temp_json_lz4( "NETWORK-ERROR.json.lz4", { "id": "http.errors.HttpErrorGeneric", "arguments": { "type": "NotImplemented" }, "source": "cjwmodule", }, {"cjw:apiEndpoint": "2/tweets/search/recent"}, ) ]) as tar_path: _assert_render( twitter.FetchResult(tar_path, []), P(accumulate=False), None, [ cjwmodule_i18n_message("http.errors.HttpErrorGeneric", {"type": "NotImplemented"}) ], )