Example #1
0
    def test_not(self):
        all_other_titles = {
            'A Clash of Kings',
            'A Game of Thrones',
            'A Storm of Swords',
            'Foundation',
            'Learning Python',
            'The Hobbit',
            'The Two Towers',
            'The Fellowship of the Ring',
            'The Return of the King',
            'The Rust Programming Language',
            'Two Scoops of Django 1.11',
            'Programming Rust',
        }

        results = self.backend.search(Not(PlainText('javascript')),
                                      models.Book.objects.all())
        self.assertSetEqual({r.title for r in results}, all_other_titles)

        results = self.backend.search(~PlainText('javascript'),
                                      models.Book.objects.all())
        self.assertSetEqual({r.title for r in results}, all_other_titles)
        # Tests multiple words
        results = self.backend.search(~PlainText('javascript the'),
                                      models.Book.objects.all())
        self.assertSetEqual({r.title for r in results}, all_other_titles)
Example #2
0
    def test_not(self):
        all_other_titles = {
            "A Clash of Kings",
            "A Game of Thrones",
            "A Storm of Swords",
            "Foundation",
            "Learning Python",
            "The Hobbit",
            "The Two Towers",
            "The Fellowship of the Ring",
            "The Return of the King",
            "The Rust Programming Language",
            "Two Scoops of Django 1.11",
            "Programming Rust",
        }

        results = self.backend.search(
            Not(PlainText("javascript")), models.Book.objects.all()
        )
        self.assertSetEqual({r.title for r in results}, all_other_titles)

        results = self.backend.search(
            ~PlainText("javascript"), models.Book.objects.all()
        )
        self.assertSetEqual({r.title for r in results}, all_other_titles)
        # Tests multiple words
        results = self.backend.search(
            ~PlainText("javascript the"), models.Book.objects.all()
        )
        self.assertSetEqual({r.title for r in results}, all_other_titles)
Example #3
0
def normalize(search_query: SearchQuery) -> Tuple[SearchQuery]:
    """
    Turns this query into a normalized version.
    For example, And(Not(PlainText("Arepa")), PlainText("Crepe")) would be turned into AndNot(PlainText("Crepe"), PlainText("Arepa")): "Crepe AND NOT Arepa".
    This is done because we need to get the NOT operator to the front of the query, so it can be used in the search, because the SQLite FTS5 module doesn't support the unary NOT operator. This means that, in order to support the NOT operator, we need to match against the non-negated version of the query, and then return everything that is not in the results of the non-negated query.
    """
    if isinstance(search_query, Phrase):
        return search_query  # We can't normalize a Phrase.
    if isinstance(search_query, PlainText):
        return search_query  # We can't normalize a PlainText.
    if isinstance(search_query, And):
        normalized_subqueries: List[SearchQuery] = [normalize(subquery) for subquery in search_query.subqueries]  # This builds a list of normalized subqueries.

        not_negated_subqueries = [subquery for subquery in normalized_subqueries if not isinstance(subquery, Not)]  # All the non-negated subqueries.
        not_negated_subqueries = [subquery for subquery in not_negated_subqueries if not isinstance(subquery, MatchAll)]  # We can ignore all MatchAll SearchQueries here, because they are redundant.
        negated_subqueries = [subquery.subquery for subquery in normalized_subqueries if isinstance(subquery, Not)]

        if negated_subqueries == []:  # If there are no negated subqueries, return an And(), now without the redundant MatchAll subqueries.
            return And(not_negated_subqueries)

        for subquery in negated_subqueries:  # If there's a negated MatchAll subquery, then nothing will get matched.
            if isinstance(subquery, MatchAll):
                return Not(MatchAll())

        return AndNot(And(not_negated_subqueries), Or(negated_subqueries))
    if isinstance(search_query, Or):
        normalized_subqueries: List[SearchQuery] = [normalize(subquery) for subquery in search_query.subqueries]  # This builds a list of (subquery, negated) tuples.

        negated_subqueries = [subquery.subquery for subquery in normalized_subqueries if isinstance(subquery, Not)]
        if negated_subqueries == []:  # If there are no negated subqueries, return an Or().
            return Or(normalized_subqueries)

        for subquery in negated_subqueries:  # If there's a MatchAll subquery, then anything will get matched.
            if isinstance(subquery, MatchAll):
                return MatchAll()

        not_negated_subqueries = [subquery for subquery in normalized_subqueries if not isinstance(subquery, Not)]  # All the non-negated subqueries.
        not_negated_subqueries = [subquery for subquery in not_negated_subqueries if not isinstance(subquery, MatchAll)]  # We can ignore all MatchAll SearchQueries here, because they are redundant.

        return AndNot(MatchAll(), And(negated_subqueries))
    if isinstance(search_query, Not):
        normalized = normalize(search_query.subquery)
        return Not(normalized)  # Normalize the subquery, then invert it.
    if isinstance(search_query, MatchAll):
        return search_query  # We can't normalize a MatchAll.
Example #4
0
    def test_not(self):
        all_other_titles = {
            "A Clash of Kings",
            "A Game of Thrones",
            "A Storm of Swords",
            "Foundation",
            "Learning Python",
            "The Hobbit",
            "The Two Towers",
            "The Fellowship of the Ring",
            "The Return of the King",
            "The Rust Programming Language",
            "Two Scoops of Django 1.11",
            "Programming Rust",
        }

        results = self.backend.search(
            Not(PlainText("javascript")), models.Book.objects.all()
        )
        self.assertSetEqual({r.title for r in results}, all_other_titles)

        results = self.backend.search(
            ~PlainText("javascript"), models.Book.objects.all()
        )
        self.assertSetEqual({r.title for r in results}, all_other_titles)

        # Tests multiple words
        results = self.backend.search(
            ~PlainText("javascript the"), models.Book.objects.all()
        )
        # NOTE: The difference with the parent method is here. As we're querying NOT 'javascript the', all entries containing both words should be excluded, but MySQL doesn't index stopwords in FULLTEXT indexes by default, so the JavaScript books won't match the query, since the 'the' word is excluded from the index. Therefore, both books will get returned.
        self.assertSetEqual(
            {r.title for r in results},
            all_other_titles
            | {"JavaScript: The Definitive Guide", "JavaScript: The good parts"},
        )

        # Tests multiple words too, but this time the second word is not a stopword
        results = self.backend.search(
            ~PlainText("javascript parts"), models.Book.objects.all()
        )
        self.assertSetEqual(
            {r.title for r in results},
            all_other_titles | {"JavaScript: The Definitive Guide"},
        )