def test_execute_exception_internal_server_error(tmp_path: Path) -> None: # Simulate 500 Internal Server Error on first request to Twitter. responses.add( responses.GET, "https://mobile.twitter.com/robots.txt", body="Crawl-delay: 1", ) responses.add( responses.GET, "https://mobile.twitter.com/search", match_querystring=False, status=HTTPStatus.INTERNAL_SERVER_ERROR.value, ) batch = Batch() batch.append(Search("trump", max_tweets=50)) assert not batch.execute(tmp_path) batch_entry = batch[0] assert batch_entry == read_json(tmp_path / batch_entry.meta_file_name, BatchEntry) assert batch_entry.exception is not None assert batch_entry.exception.type == "UnexpectedStatusCodeException"
def test_execute_retrying_after_exception(tmp_path: Path, caplog: LogCaptureFixture) -> None: batch = Batch() batch.append(Search("trump", max_tweets=50)) batch_entry = batch[0] exception = _make_json_serialized_exception() batch_entry.exception = exception meta_file = tmp_path / batch_entry.meta_file_name write_json(meta_file, batch_entry) batch_entry.exception = None meta_stat1 = meta_file.stat() caplog.clear() assert batch.execute(tmp_path) assert 1 == len( # Assert that log says we are retrying and the previous exception. [ record for record in caplog.records if "Retrying" in record.msg and str(exception) in record.msg ]) _assert_results_dir_structure(tmp_path, list(batch)) meta_stat2 = meta_file.stat() assert meta_stat1.st_mtime_ns < meta_stat2.st_mtime_ns
def test_query_word_single(word: str) -> None: tweets = list(Search(word, max_tweets=50).request()) assert 50 == len(tweets) for tweet in tweets: assert word.lower() in json.dumps(tweet.to_json()).lower()
def test_max_tweets(max_tweets: int) -> None: # Using batch_size=100 to speed up these larger requests and since we don't care # about accuracy to query here. tweets = list(Search("trump", max_tweets=max_tweets, batch_size=100).request()) assert max_tweets == len(tweets) assert len(tweets) == len({tweet.id for tweet in tweets})
def test_special_msg_coronavirus() -> None: tweets = list( Search("coronavirus", max_tweets=50, filter_=SearchFilter.LATEST).request() ) assert 50 == len(tweets)
def test_lang_invalid() -> None: assert not list(Search("trump", lang="INVALID", max_tweets=50).request())
def test_lang_de() -> None: assert 50 == len(list(Search("trump", lang="de", max_tweets=50).request()))
def test_filter_top() -> None: assert 50 == len( list(Search("trump", filter_=SearchFilter.TOP, max_tweets=50).request()) )
def test_date_future() -> None: assert not list(Search("trump", since=(date.today() + timedelta(days=7))).request())
def test_date_range(args: Tuple[date, date]) -> None: since, until = args tweets = list(Search("trump", since=since, until=until, max_tweets=40).request()) assert 40 == len(tweets) for tweet in tweets: assert since <= tweet.created_at.date() < until
def test_query_user_from(user: str) -> None: tweets = list(Search("from:@" + user, max_tweets=50).request()) assert 0 < len(tweets) <= 50 for tweet in tweets: assert user.lower() == tweet.user.screen_name.lower()
def test_lang_invalid() -> None: assert not list( Search("trump", lang="INVALID", max_tweets=50, filter_=SearchFilter.LATEST).request())
def test_search_into_daily_requests_illegal_args(search: Search) -> None: with pytest.raises(ValueError): search.to_daily_requests()
"until": date(2010, 1, 1) }), ], ids=lambda args: args[0].__name__ + ": " + repr(args[1]), ) def test_illegal_args( args: Tuple[Type[Request], Mapping[str, object]]) -> None: type_, kwargs = args with pytest.raises(ValueError): type_(**kwargs) @pytest.mark.parametrize( "request_", [ Search("q"), Replies("332308211321425920", max_tweets=None), Thread("332308211321425920", max_tweets=123, batch_size=456), ], ids=repr, ) def test_json_conversion(request_: Request) -> None: assert request_ == request_.from_json(request_.to_json()) @pytest.mark.parametrize( "search", [Search("q", since=date(2010, 1, 1), until=date(2010, 2, 1))], ids=repr, ) def test_search_to_daily_requests(search: Search) -> None:
def test_json_conversion_completed_at() -> None: batch_entry = BatchEntry(Search("q"), id_="id", completed_at=datetime.now(), exception=None) assert batch_entry == batch_entry.from_json(batch_entry.to_json())
from _pytest.logging import LogCaptureFixture from _pytest.monkeypatch import MonkeyPatch from nasty._util.io_ import read_file, read_lines_file, write_file from nasty._util.json_ import JsonSerializedException, read_json, write_json from nasty._util.typing_ import checked_cast from nasty.batch.batch import Batch from nasty.batch.batch_entry import BatchEntry from nasty.batch.batch_results import BatchResults from nasty.request.replies import Replies from nasty.request.request import Request from nasty.request.search import Search, SearchFilter from nasty.request.thread import Thread REQUESTS: Sequence[Request] = [ Search("q"), Search("q", filter_=SearchFilter.PHOTOS, lang="de"), Search("q", since=date(2009, 1, 20), until=date(2017, 1, 20)), Replies("332308211321425920"), Replies("332308211321425920", max_tweets=50), Thread("332308211321425920"), Thread("332308211321425920", batch_size=100), ] def _make_json_serialized_exception() -> JsonSerializedException: # Collect exception with trace. try: raise ValueError("Test Error.") except ValueError as e: return JsonSerializedException.from_exception(e)