Ejemplo n.º 1
0
def test_render_v2_generic_api_error():
    with _temp_tarfile([
            lambda: _temp_json_lz4(
                "API-ERROR.lz4",
                {
                    "title": "bad-request",
                    "errors": [{
                        "message": "a message from Twitter"
                    }],
                },
                {
                    "cjw:apiEndpoint": "2/tweets/search/recent",
                    "cjw:apiParams":
                    "expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id.author_id&max_results=100&query=science&tweet.fields=id%2Ctext%2Cauthor_id%2Ccreated_at%2Cin_reply_to_user_id%2Cpublic_metrics%2Csource%2Clang%2Creferenced_tweets&user.fields=id%2Cdescription%2Cusername%2Cname",
                    "cjw:httpStatus": "400",
                },
            )
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            None,
            [
                i18n_message(
                    "error.genericApiErrorV2",
                    {
                        "title": "bad-request",
                        "message": "a message from Twitter"
                    },
                )
            ],
        )
Ejemplo n.º 2
0
def test_render_undefined_language_is_null():
    # https://blog.twitter.com/developer/en_us/a/2013/introducing-new-metadata-for-tweets.html
    with _temp_tarfile([
            lambda: _temp_json_path_lz4(
                "1088215462867959800.json.lz4",
                Path("tests/files/1_1_one_undefined_lang.json"),
                {"cjw:apiEndpoint": "1.1/statuses/user_timeline.json"},
            )
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            pa.table({
                "screen_name": ["workbenchdata"],
                "created_at":
                pa.array([dt("Wed Jan 23 23:22:39 +0000 2019")],
                         pa.timestamp("ns")),
                "text": ["🤖 https://t.co/FOhOfZT9MZ"],
                "retweet_count": [0],
                "favorite_count": [1],
                "in_reply_to_screen_name":
                pa.nulls(1, pa.utf8()),
                "retweeted_status_screen_name":
                pa.nulls(1, pa.utf8()),
                "user_description": [
                    "Scrape, clean and analyze data without code. Create reproducible data workflows that can be shared with others"
                ],
                "source": ["Twitter for iPhone"],
                # "lang" is the key data point we're testing
                "lang":
                pa.nulls(1, pa.utf8()),
                "id": [1088215462867959800],
            }),
            [],
        )
Ejemplo n.º 3
0
def test_render_v1_1_generic_api_error():
    with _temp_tarfile([
            lambda: _temp_json_lz4(
                "API-ERROR.lz4",
                {"error": "a message from Twitter"},
                {
                    "cjw:apiEndpoint": "1.1/statuses/user_timeline",
                    "cjw:apiParams": "count=200&screen_name=adamhooper",
                    "cjw:httpStatus": "500",
                },
            )
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            None,
            [
                i18n_message(
                    "error.genericApiErrorV1_1",
                    {
                        "httpStatus": "500",
                        "error": "a message from Twitter"
                    },
                )
            ],
        )
Ejemplo n.º 4
0
def test_render_v0_add_retweet_status_screen_name():
    # Migration: what happens when we accumulate tweets
    # where the old stored table does not have retweet_status_screen_name?
    # We should consider those to have just None in that column
    input_table = pa.table({
        "screen_name": ["TheTweepyTester", "TheTweepyTester"],
        "created_at":
        pa.array(
            [dt("2016-11-05T21:38:46Z"),
             dt("2016-11-05T21:37:13Z")],
            pa.timestamp("ns"),
        ),
        "text": ["Hello", "testing 1000 https://t.co/3vt8ITRQ3w"],
        "retweet_count": [0, 0],
        "favorite_count": [0, 0],
        "in_reply_to_screen_name":
        pa.array([None, None], pa.utf8()),
        "user_description": ["", ""],
        "source": ["Twitter Web Client", "Tweepy dev"],
        "lang": ["en", "en"],
        "id": [795017539831103489, 795017147651162112],
    })
    with _temp_parquet_file(input_table) as parquet_path:
        _assert_render(
            twitter.FetchResult(parquet_path, []),
            P(accumulate=False),
            input_table.add_column(6, "retweeted_status_screen_name",
                                   pa.array([None, None], pa.utf8())),
            [],
        )
Ejemplo n.º 5
0
def test_render_retweeted_status_full_text_twitter_api_v1():
    with _temp_tarfile([
            lambda: _temp_json_path_lz4(
                "1105492514289512400.json.lz4",
                Path("tests/files/1_1_one_extended_retweet.json"),
                {"cjw:apiEndpoint": "1.1/statuses/user_timeline.json"},
            )
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            pa.table({
                "screen_name": ["workbenchdata"],
                "created_at":
                pa.array([dt("Tue Mar 12 15:35:29 +0000 2019")],
                         pa.timestamp("ns")),
                "text": [
                    # "text" is the key data point we're testing
                    "RT @JacopoOttaviani: ⚡️ I'm playing with @workbenchdata: absolutely mindblowing. It's like a fusion between ScraperWiki, OpenRefine and Datawrapper. All of it online in the cloud and for free 👉🏽 https://t.co/fleqjI1qCI https://t.co/mmWHJLDjT2 #ddj #dataviz"
                ],
                "retweet_count": [7],
                "favorite_count": [0],
                "in_reply_to_screen_name":
                pa.nulls(1, pa.utf8()),
                "retweeted_status_screen_name": ["JacopoOttaviani"],
                "user_description": [
                    "Scrape, clean and analyze data without code. Create reproducible data workflows that can be shared with others"
                ],
                "source": ["Twitter for iPhone"],
                "lang": ["en"],
                "id": [1105492514289512400],
            }),
            [],
        )
Ejemplo n.º 6
0
def test_render_v0_truncate_fetch_results():
    all_rows = pa.table({
        "screen_name": ["TheTweepyTester", "TheTweepyTester"],
        "created_at":
        pa.array(
            [dt("2016-11-05T21:38:46Z"),
             dt("2016-11-05T21:37:13Z")],
            pa.timestamp("ns"),
        ),
        "text": ["Hello", "testing 1000 https://t.co/3vt8ITRQ3w"],
        "retweet_count": [0, 0],
        "favorite_count": [0, 0],
        "in_reply_to_screen_name":
        pa.array([None, None], pa.utf8()),
        "retweeted_status_screen_name":
        pa.array([None, None], pa.utf8()),
        "user_description": ["", ""],
        "source": ["Twitter Web Client", "Tweepy dev"],
        "lang": ["en", "en"],
        "id": [795017539831103489, 795017147651162112],
    })
    with _temp_parquet_file(all_rows) as parquet_path:
        _assert_render(
            twitter.FetchResult(parquet_path, []),
            P(accumulate=False),
            all_rows.slice(0, 1),
            [],
        )
Ejemplo n.º 7
0
def test_render_v0_recover_after_bug_160258591():
    # https://www.pivotaltracker.com/story/show/160258591
    # 'id', 'retweet_count' and 'favorite_count' had wrong type after
    # accumulating an empty table. Now the bad data is in our database;
    # let's convert back to the type we want.
    input_table = pa.table({
        "screen_name": ["TheTweepyTester", "TheTweepyTester"],
        "created_at":
        pa.array(
            [dt("2016-11-05T21:38:46Z"),
             dt("2016-11-05T21:37:13Z")],
            pa.timestamp("ns"),
        ),
        "text": ["Hello", "testing 1000 https://t.co/3vt8ITRQ3w"],
        "retweet_count": ["0", "0"],
        "favorite_count": ["0", "0"],
        "in_reply_to_screen_name":
        pa.array([None, None], pa.utf8()),
        "retweeted_status_screen_name":
        pa.array([None, None], pa.utf8()),
        "user_description": ["", ""],
        "source": ["Twitter Web Client", "Tweepy dev"],
        "lang": ["en", "en"],
        "id": ["795017539831103489", "795017147651162112"],
    })
    with _temp_parquet_file(input_table) as parquet_path:
        _assert_render(
            twitter.FetchResult(parquet_path, []),
            P(accumulate=False),
            (input_table.set_column(3, "retweet_count", pa.array([
                0, 0
            ])).set_column(4, "favorite_count", pa.array([0, 0])).set_column(
                10, "id", pa.array([795017539831103489, 795017147651162112]))),
            [],
        )
Ejemplo n.º 8
0
def test_render_v0_empty_table_accumulated():
    with _temp_parquet_file(
            twitter.ARROW_SCHEMA.empty_table()) as parquet_path:
        _assert_render(
            twitter.FetchResult(parquet_path, []),
            P(accumulate=True),
            twitter.ACCUMULATED_SCHEMA.empty_table(),
            [],
        )
Ejemplo n.º 9
0
def test_render_empty_tarfile():
    # When we haven't fetched, we shouldn't show any columns (for
    # consistency with other modules)
    with _temp_tarfile([]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            twitter.ARROW_SCHEMA.empty_table(),
            [],
        )
Ejemplo n.º 10
0
def test_render_empty_file():
    # When we haven't fetched, we shouldn't show any columns (for
    # consistency with other modules)
    with tempfile.NamedTemporaryFile() as tf:
        _assert_render(
            twitter.FetchResult(Path(tf.name), []),
            P(accumulate=False),
            twitter.ARROW_SCHEMA.empty_table(),
            [],
        )
Ejemplo n.º 11
0
def test_render_fetch_generated_error():
    # fetch() generates errors when params are invalid
    _assert_render(
        twitter.FetchResult(
            Path("unused"),
            [twitter.RenderError(i18n_message("error.invalidUsername"))]),
        P(accumulate=False),
        None,
        [i18n_message("error.invalidUsername")],
    )
Ejemplo n.º 12
0
def test_render_v0_zero_column_search_result():
    # An empty table might be stored as zero-column. This is a bug, but we
    # must handle it because we have actual data like this. We want to
    # output all the same columns as a tweet table.
    with _temp_parquet_file(pa.table({})) as parquet_path:
        _assert_render(
            twitter.FetchResult(parquet_path, []),
            P(accumulate=False),
            twitter.ARROW_SCHEMA.empty_table(),
            [],
        )
Ejemplo n.º 13
0
def test_render_http_429():
    # Twitters' 429 error isn't JSON. Handle it.
    with _temp_tarfile([
            lambda: contextlib.nullcontext((
                "API-ERROR.lz4",
                lz4.frame.compress(b"Rate limit exceeded"),
                {
                    "cjw:apiEndpoint": "2/tweets/search/recent",
                    "cjw:httpStatus": "429",
                },
            ))
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            None,
            [i18n_message("error.tooManyRequests")],
        )
Ejemplo n.º 14
0
def test_render_http_401_user_tweets_are_private():
    with _temp_tarfile([
            lambda: _temp_json_lz4(
                "API-ERROR.lz4",
                {"error": "doesn't really matter"},
                {
                    "cjw:apiEndpoint": "1.1/statuses/user_timeline",
                    "cjw:apiParams": "count=200&screen_name=elizabeth1",
                    "cjw:httpStatus": "401",
                },
            )
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            None,
            [
                i18n_message("error.userTweetsArePrivate",
                             {"username": "******"})
            ],
        )
Ejemplo n.º 15
0
def test_render_http_404_username_not_found():
    with _temp_tarfile([
            lambda: _temp_json_lz4(
                "API-ERROR.lz4",
                {"error": "doesn't really matter"},
                {
                    "cjw:apiEndpoint": "1.1/statuses/user_timeline",
                    "cjw:apiParams":
                    "count=200&screen_name=doesnotexistnoreally",
                    "cjw:httpStatus": "404",
                },
            )
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            None,
            [
                i18n_message("error.userDoesNotExist",
                             {"username": "******"})
            ],
        )
Ejemplo n.º 16
0
def test_render_network_error():
    with _temp_tarfile([
            lambda: _temp_json_lz4(
                "NETWORK-ERROR.json.lz4",
                {
                    "id": "http.errors.HttpErrorGeneric",
                    "arguments": {
                        "type": "NotImplemented"
                    },
                    "source": "cjwmodule",
                },
                {"cjw:apiEndpoint": "2/tweets/search/recent"},
            )
    ]) as tar_path:
        _assert_render(
            twitter.FetchResult(tar_path, []),
            P(accumulate=False),
            None,
            [
                cjwmodule_i18n_message("http.errors.HttpErrorGeneric",
                                       {"type": "NotImplemented"})
            ],
        )