async def test_run__passes_correct_blocklists_to_subscriptions(mock_client): submission = MockSubmission("12322") api = MockExportAPI().with_submission(submission) watcher = SubscriptionWatcher(api, mock_client) method_called = MockMethod([submission]) watcher._get_new_results = method_called.async_call watcher.BACK_OFF = 1 watcher.blocklists = {156: {"test", "ych"}, -200: {"example"}} sub1 = MockSubscription("deer", 156) sub2 = MockSubscription("dog", -232) watcher.subscriptions = [sub1, sub2] task = asyncio.get_event_loop().create_task(watcher_killer(watcher)) # Run watcher await watcher.run() await task assert submission in sub1.submissions_checked assert len(sub1.blocklists) == 1 assert sub1.blocklists[0] in [ AndQuery([NotQuery(WordQuery("test")), NotQuery(WordQuery("ych"))]), AndQuery([NotQuery(WordQuery("ych")), NotQuery(WordQuery("test"))]) ] assert submission in sub2.submissions_checked assert len(sub2.blocklists) == 1 assert sub2.blocklists[0] == AndQuery([]) assert method_called.called
def test_connectors(): assert parse_query("first and document") == AndQuery([WordQuery("first"), WordQuery("document")]) assert parse_query("first or document") == OrQuery([WordQuery("first"), WordQuery("document")]) assert parse_query("first AND doc OR document") == OrQuery( [AndQuery([WordQuery("first"), WordQuery("doc")]), WordQuery("document")] ) assert parse_query("first doc OR document") == OrQuery( [AndQuery([WordQuery("first"), WordQuery("doc")]), WordQuery("document")] )
def test_fields(): assert parse_query("keyword:first document") == AndQuery( [WordQuery("first", KeywordField()), WordQuery("document")] ) assert parse_query("keyword:\"first document\"") == PhraseQuery("first document", KeywordField()) with pytest.raises(InvalidQueryException): parse_query("keyword:(first and document)") assert parse_query("@keyword first document") == AndQuery( [WordQuery("first", KeywordField()), WordQuery("document")] )
def test_brackets(): assert parse_query("(first)") == WordQuery("first") assert parse_query("first and (doc or document)") == AndQuery( [WordQuery("first"), OrQuery([WordQuery("doc"), WordQuery("document")])] ) assert parse_query("first (doc or document)") == AndQuery( [WordQuery("first"), OrQuery([WordQuery("doc"), WordQuery("document")])] ) with pytest.raises(InvalidQueryException): parse_query("first (doc or document")
def test_matches_mature_rating(): query1 = "deer rating:mature" query2 = "deer rating:questionable" subscription1 = Subscription(query1, 12432) subscription2 = Subscription(query2, 12432) submission = SubmissionBuilder( title="Deer plays in woods", rating=Rating.MATURE).build_full_submission() match1 = subscription1.matches_result(submission, AndQuery([])) match2 = subscription2.matches_result(submission, AndQuery([])) assert match1 assert match2
def test_matches_general_rating(): query1 = "deer rating:general" query2 = "deer rating:safe" subscription1 = Subscription(query1, 12432) subscription2 = Subscription(query2, 12432) submission = SubmissionBuilder( title="Deer plays in woods", rating=Rating.GENERAL).build_full_submission() match1 = subscription1.matches_result(submission, AndQuery([])) match2 = subscription2.matches_result(submission, AndQuery([])) assert match1 assert match2
def test_matches_explicit_rating(): query1 = "deer rating:adult" query2 = "deer rating:explicit" subscription1 = Subscription(query1, 12432) subscription2 = Subscription(query2, 12432) submission = SubmissionBuilder( title="Deer plays in woods", rating=Rating.ADULT).build_full_submission() match1 = subscription1.matches_result(submission, AndQuery([])) match2 = subscription2.matches_result(submission, AndQuery([])) assert match1 assert match2
def test_negation(): assert parse_query("-first") == NotQuery(WordQuery("first")) assert parse_query("!first") == NotQuery(WordQuery("first")) assert parse_query("! first") == NotQuery(WordQuery("first")) assert parse_query("not first") == NotQuery(WordQuery("first")) assert parse_query("NOT first") == NotQuery(WordQuery("first")) assert parse_query("first not second") == AndQuery([WordQuery("first"), NotQuery(WordQuery("second"))])
def test_matches_result__prefix_matches(): query = "deer*" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="deertaur plays in woods").build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert match
def test_matches_result__suffix_doesnt_match_term(): query = "*taur" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="taur plays in woods").build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__regex_matches_case_insensitive(): query = "d*taur" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="DeerTaur plays in woods").build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert match
def test_matches_not_negative_rating(): query = "deer -rating:general" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Deer plays in woods", rating=Rating.GENERAL).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__keywords_not_phrase(): query = "\"hello world\"" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description="this submission says hello to the world", keywords=["example", "hello", "world"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__two_words_in_description_matches(): query = "example submission" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="test", description="this submission is just an example", keywords=["example", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert match
def test_matches_result__case_insensitive_query(): query = "SUBMISSION" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="test submission", description="this submission is just an example", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert match
def test_matches_result__doesnt_match_except_quote(): query = "taur except \"no taur\"" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description="this submission contains no taur", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_word_in_tag(): query = "deer" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description="this submission is <b>deer</b>", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert match
def test_matches_result__does_not_match_query_with_applicable_negated_query(): query = "test -example" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description="this submission is just an example", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__keyword_field(): query = "keywords:deer" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description="this submission has no deer and will not be tagged deer", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__matches_query_with_hyphen(): query = "an-example" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description="this submission is just an-example", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert match
def test_matches_result__substring_in_keywords_no_match(): query = "keyword" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="test submission", description="this submission is just an example", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
async def run(self): """ This method is launched as a task, it reads the browse endpoint for new submissions, and checks if they match existing subscriptions """ self.running = True while self.running: try: new_results = await self._get_new_results() except Exception as e: logger.error("Failed to get new results", exc_info=e) continue count = 0 heartbeat.update_heartbeat(heartbeat_app_name) # Check for subscription updates for result in new_results: count += 1 # Try and get the full data try: full_result = await self.api.get_full_submission(result.submission_id) logger.debug("Got full data for submission %s", result.submission_id) except PageNotFound: logger.warning("Submission %s, disappeared before I could check it.", result.submission_id) continue except Exception as e: logger.error("Failed to get submission %s", result.submission_id, exc_info=e) continue # Copy subscriptions, to avoid "changed size during iteration" issues subscriptions = self.subscriptions.copy() # Check which subscriptions match matching_subscriptions = [] for subscription in subscriptions: blocklist = self.blocklists.get(subscription.destination, set()) blocklist_query = AndQuery([NotQuery(self._get_blocklist_query(block)) for block in blocklist]) if subscription.matches_result(full_result, blocklist_query): matching_subscriptions.append(subscription) logger.debug( "Submission %s matches %s subscriptions", result.submission_id, len(matching_subscriptions) ) if matching_subscriptions: await self._send_updates(matching_subscriptions, full_result) # Update latest ids with the submission we just checked, and save config self._update_latest_ids([result]) # If we've done ten, update heartbeat if count % self.UPDATE_PER_HEARTBEAT == 0: heartbeat.update_heartbeat(heartbeat_app_name) logger.debug("Heartbeat") # Wait await self._wait_while_running(self.BACK_OFF) logger.info("Subscription watcher shutting down")
def test_matches_result__doesnt_match_except_bracket_clause(): query = "multi* except (multicol* or multitude)" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description= "this submission is just an example of a multitude of multicoloured things", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__except_matches_other_match(): query = "multi* except multitude" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description= "this submission is just an example of a multitude of multiple things", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert match
def test_matches_result__doesnt_match_except_field(): query = "keywords:(multi* except (multitude multiple multicol*))" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description= "this submission is about multiplication but not tagged like that", keywords=["multitude", "multiple", "multicoloured", "multicolors"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__not_when_paused(): query = "test" subscription = Subscription(query, 12432) subscription.paused = True submission = SubmissionBuilder( title="test submission", description="this submission is just an example", keywords=["example", "submission", "keywords"]).build_full_submission() match = subscription.matches_result(submission, AndQuery([])) assert not match
def test_matches_result__doesnt_match_blocklisted_rating(): query = "an-example" subscription = Subscription(query, 12432) submission = SubmissionBuilder( title="Test submission", description="this submission is just an-example", keywords=["example", "submission", "keywords"], rating=Rating.ADULT).build_full_submission() blocklist = AndQuery([ NotQuery(RatingQuery(Rating.ADULT)), NotQuery(RatingQuery(Rating.MATURE)) ]) match = subscription.matches_result(submission, blocklist) assert not match
def whoosh_to_custom(q: Query) -> 'Query': if isinstance(q, Or): return OrQuery([whoosh_to_custom(w) for w in q.subqueries]) if isinstance(q, And): return AndQuery([whoosh_to_custom(w) for w in q.subqueries]) if isinstance(q, Term): if q.fieldname == "rating": return RatingQuery(rating_dict[q.text]) field = get_field_for_name(q.fieldname) return WordQuery(q.text, field) if isinstance(q, Prefix): field = get_field_for_name(q.fieldname) return PrefixQuery(q.text, field) if isinstance(q, Wildcard): field = get_field_for_name(q.fieldname) regex = fnmatch.translate(q.text) return RegexQuery(regex, field) if isinstance(q, Phrase): field = get_field_for_name(q.fieldname) quote = " ".join(q.words) return PhraseQuery(quote, field) raise NotImplementedError
def test_parser(): assert parse_query("first") == WordQuery("first") assert parse_query("first document") == AndQuery([WordQuery("first"), WordQuery("document")])
def matches_result(self, result: FASubmissionFull, blocklist_query: Query) -> bool: if self.paused: return False full_query = AndQuery([self.query, blocklist_query]) return full_query.matches_submission(result)