Ejemplo n.º 1
0
 def test_fuzz(self):
     '''
     Test applying a fuzz factor to a query
     '''
     query = Query("hello bruno").fuzz(2)
     self.assertEqual('"hello bruno"~2', str(query))
     with self.assertRaises(ValueError):
         query.fuzz(7)
Ejemplo n.º 2
0
 def test_for_single_field(self):
     '''
     Test applying a single field to a query
     '''
     query = Query("hello bruno").for_single_field('id')
     self.assertEqual('id:"hello bruno"', str(query))
     query = Query("hello bruno").for_single_field('')
     self.assertEqual('"hello bruno"', str(query))
Ejemplo n.º 3
0
    def test_sanitation(self):
        '''
        Test query init with sanitize=True
        '''
        query = Query("The quick brown fox jumped over 12 lazy dogs",
                      sanitize=True)
        print(str(query))

        # If query only contains garbage, should use original string
        query = Query('and and but but', sanitize=True)
        self.assertEqual('"and and but but"', str(query))
Ejemplo n.º 4
0
 def test_for_fields(self):
     """
     Test applying fields to a Query
     """
     fields = {'id': 1, 'name': 10}
     query = Query("hello bruno").for_fields(fields)
     self.assertEqual(
         '"hello bruno" OR id:("hello bruno")^1 OR name:("hello bruno")^10',
         str(query))
     not_dict = "not clean"
     with self.assertRaises(ValueError):
         query.for_fields(not_dict)
Ejemplo n.º 5
0
    def test_init(self):
        """
        Test initializing a Query
        as_phrase and not as_phrase
        """
        query_str = "hello"
        query = Query(query_str)
        self.assertEqual('"hello"', str(query))

        query = Query(query_str, as_phrase=False)
        self.assertEqual('hello', str(query))

        query = Query('wow:wow()', escape=True)
        self.assertEqual('"wow\:wow\(\)"', str(query))
Ejemplo n.º 6
0
def build_getdocument_query(doc_id, base_kwargs):
    '''
    Builds a query and sets parameters to find the document associated with
    the given doc_id, allowing for a missing/extra trailing "/" on the doc_id
    '''
    kwargs = base_kwargs.copy()
    query = Query(doc_id, as_phrase=False, escape=True).for_single_field('id') \
        .select_or(
            Query(doc_id + '/', as_phrase=False, escape=True).for_single_field('id')
        ) \
        .select_or(
            Query(doc_id.rstrip('/'), as_phrase=False, escape=True).for_single_field('id')
        )
    kwargs['default_field'] = 'id'
    return (str(query), kwargs)
Ejemplo n.º 7
0
 def test_boost_importance(self):
     """
     Test boosting the importance of a query
     """
     query_str = "hello bruno"
     query = Query(query_str).boost_importance(5)
     self.assertEqual('("hello bruno")^5', str(query))
Ejemplo n.º 8
0
    def test_selects(self):
        """
        Test select operators: AND, OR, REQUIRE
        """
        # AND
        query1 = Query("hello bruno")
        query2 = Query("bye bruno")
        combined_query = query1.select_and(query2)
        self.assertEqual('"hello bruno" AND "bye bruno"', str(combined_query))

        # OR
        query1 = Query("hello bruno")
        combined_query = query1.select_or(query2)
        self.assertEqual('"hello bruno" OR "bye bruno"', str(combined_query))

        # REQUIRE
        terms = ["hack", "wack"]
        query = Query("hello bruno").select_require(terms)
        self.assertEqual('"hello bruno"+hack+wack', str(query))
        query = Query("hello bruno").select_require([])
        self.assertEqual('"hello bruno"', str(query))
Ejemplo n.º 9
0
def build_search_query(core, query_str, base_kwargs):
    '''
    Builds a search query and sets parameters that is most likely to
    return the best results for the given core using the given user query.

    See https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
    for more information about Apache Lucene query syntax.
    '''
    kwargs = base_kwargs.copy()

    if core == 'genericPage':
        fields = {
            'id': 1,
            'name': 8,
            'siteName': 5,
            'description': 5,
            'content': 8
        }
        query = Query(query_str) \
            .fuzz(2) \
            .boost_importance(5)
        terms_query = Query(query_str, as_phrase=False, escape=True, sanitize=True) \
            .fuzz(1) \
            .for_fields(fields)
        query = query.select_or(terms_query)
        kwargs['default_field'] = 'content'
        kwargs['highlight_fields'] = 'content,description'

    elif core == 'courseItem':
        fields = {
            'id': 1,
            'name': 9,
            'description': 8,
            'subjectData': 5,
        }
        query = Query(query_str).fuzz(2)
        terms_query = Query(query_str, as_phrase=False, escape=True, sanitize=True) \
            .for_fields(fields)
        query = query.select_or(terms_query)
        kwargs['default_field'] = 'name'
        kwargs['highlight_fields'] = 'description'

    elif core == 'redditPost':
        fields = {
            'id': 1,
            'name': 7,
            'description': 10,
            'comments': 6,
        }
        query = Query(query_str).fuzz(1)
        terms_query = Query(query_str, as_phrase=False, escape=True, sanitize=True) \
            .for_fields(fields)
        query = query.select_or(terms_query)
        kwargs['default_field'] = 'name'
        kwargs['highlight_fields'] = 'description,comments'

    else:
        query = Query(query_str)

    return (str(query), kwargs)