def run(self) -> None: if self.in_dir: batch_results = BatchResults(self.in_dir) batch_results.idify(self.out_dir if self.out_dir else self.in_dir) else: for line in sys.stdin: sys.stdout.write(str(Tweet(json.loads(line)).id) + "\n")
def _assert_results_dir_structure( results_dir: Path, batch_entries: Sequence[BatchEntry], *, allow_empty: bool = False, ) -> None: assert results_dir.exists() assert 0 != len(batch_entries) batch_results = BatchResults(results_dir) # Can't create sets, because BatchEntry is not hashable, thus compare maps. assert {batch_entry.id: batch_entry for batch_entry in batch_entries} == { batch_entry.id: batch_entry for batch_entry in batch_results } for batch_entry in batch_results: assert batch_entry.completed_at is not None assert batch_entry.completed_at < datetime.now() assert batch_entry.exception is None tweets = list(batch_results.tweets(batch_entry)) if not allow_empty: assert 0 != len(tweets) assert checked_cast(int, batch_entry.request.max_tweets) >= len(tweets) for tweet in tweets: assert (checked_cast(Search, batch_entry.request).query.lower() in json.dumps(tweet.to_json()).lower())
def run(self) -> None: if self.in_dir: batch_results = BatchResults(self.in_dir) batch_results.unidify( self.settings.twitter_api, self.out_dir if self.out_dir else self.in_dir ) else: for tweet in statuses_lookup( (TweetId(line.strip()) for line in sys.stdin), self.settings.twitter_api ): if tweet is not None: sys.stdout.write(json.dumps(tweet.to_json()) + "\n")
def test_unidify_fail_and_restart( requests: Iterable[Request], settings: NastySettings, monkeypatch: MonkeyPatch, tmp_path: Path, ) -> None: idify_dir = tmp_path / "idify" unidify_dir = tmp_path / "unidify" batch = Batch() for request in requests: batch.append(request) results = batch.execute() assert results is not None tweets = { tweet.id: tweet for entry in results for tweet in results.tweets(entry) } tweets_truncated = dict(tweets) del tweets_truncated[TweetId("1115690615612825601")] idified = results.idify(idify_dir) assert idified is not None monkeypatch.setattr( nasty.batch.batch_results, nasty.batch.batch_results.statuses_lookup.__name__, # type: ignore _mock_statuses_lookup(tweets_truncated), ) # Assert KeyError is propagated, because a Tweet is missing from tweets_truncated. with pytest.raises(KeyError): idified.unidify(settings.twitter_api, unidify_dir) unidified = BatchResults(unidify_dir) assert len(batch) > len(unidified) monkeypatch.setattr( nasty.batch.batch_results, nasty.batch.batch_results.statuses_lookup.__name__, # type: ignore _mock_statuses_lookup(tweets), ) unidified = idified.unidify(settings.twitter_api, unidify_dir) assert unidified is not None assert len(batch) == len(unidified) assert tweets == { tweet.id: tweet for entry in unidified for tweet in unidified.tweets(entry) }
def _assert_tweet_texts(results: BatchResults) -> None: assert [ "We encourage submissions of new, previously, or concurrently published " "research. The event should be a forum for researchers to exchange ideas, " "discuss work, and get feedback. We hope you'll consider submitting your work.", "We'll have talks from research leaders on the latest advances in NLP. " "@NandoDF will be giving the keynote and more speakers will be announced soon. " "https://t.co/SB3URxn6ab", "Registration will open soon. In the meantime, we'll hope you'll save the date " "and consider joining us for what should be a fun day of listening to " "stimulating talks, mingling with like-minded people, exchanging ideas, and " "maybe even striking up a collaboration.", ] == [tweet.text for tweet in results.tweets(results[0])]
def _assert_tweet_ids(results: BatchResults) -> None: assert [ TweetId("1115690002233556993"), TweetId("1115690615612825601"), TweetId("1115691710657499137"), ] == list(results.tweet_ids(results[0]))