def test_fuzz(self): ''' Test applying a fuzz factor to a query ''' query = Query("hello bruno").fuzz(2) self.assertEqual('"hello bruno"~2', str(query)) with self.assertRaises(ValueError): query.fuzz(7)
def test_for_single_field(self): ''' Test applying a single field to a query ''' query = Query("hello bruno").for_single_field('id') self.assertEqual('id:"hello bruno"', str(query)) query = Query("hello bruno").for_single_field('') self.assertEqual('"hello bruno"', str(query))
def test_sanitation(self): ''' Test query init with sanitize=True ''' query = Query("The quick brown fox jumped over 12 lazy dogs", sanitize=True) print(str(query)) # If query only contains garbage, should use original string query = Query('and and but but', sanitize=True) self.assertEqual('"and and but but"', str(query))
def test_for_fields(self): """ Test applying fields to a Query """ fields = {'id': 1, 'name': 10} query = Query("hello bruno").for_fields(fields) self.assertEqual( '"hello bruno" OR id:("hello bruno")^1 OR name:("hello bruno")^10', str(query)) not_dict = "not clean" with self.assertRaises(ValueError): query.for_fields(not_dict)
def test_init(self): """ Test initializing a Query as_phrase and not as_phrase """ query_str = "hello" query = Query(query_str) self.assertEqual('"hello"', str(query)) query = Query(query_str, as_phrase=False) self.assertEqual('hello', str(query)) query = Query('wow:wow()', escape=True) self.assertEqual('"wow\:wow\(\)"', str(query))
def build_getdocument_query(doc_id, base_kwargs): ''' Builds a query and sets parameters to find the document associated with the given doc_id, allowing for a missing/extra trailing "/" on the doc_id ''' kwargs = base_kwargs.copy() query = Query(doc_id, as_phrase=False, escape=True).for_single_field('id') \ .select_or( Query(doc_id + '/', as_phrase=False, escape=True).for_single_field('id') ) \ .select_or( Query(doc_id.rstrip('/'), as_phrase=False, escape=True).for_single_field('id') ) kwargs['default_field'] = 'id' return (str(query), kwargs)
def test_boost_importance(self): """ Test boosting the importance of a query """ query_str = "hello bruno" query = Query(query_str).boost_importance(5) self.assertEqual('("hello bruno")^5', str(query))
def test_selects(self): """ Test select operators: AND, OR, REQUIRE """ # AND query1 = Query("hello bruno") query2 = Query("bye bruno") combined_query = query1.select_and(query2) self.assertEqual('"hello bruno" AND "bye bruno"', str(combined_query)) # OR query1 = Query("hello bruno") combined_query = query1.select_or(query2) self.assertEqual('"hello bruno" OR "bye bruno"', str(combined_query)) # REQUIRE terms = ["hack", "wack"] query = Query("hello bruno").select_require(terms) self.assertEqual('"hello bruno"+hack+wack', str(query)) query = Query("hello bruno").select_require([]) self.assertEqual('"hello bruno"', str(query))
def build_search_query(core, query_str, base_kwargs): ''' Builds a search query and sets parameters that is most likely to return the best results for the given core using the given user query. See https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html for more information about Apache Lucene query syntax. ''' kwargs = base_kwargs.copy() if core == 'genericPage': fields = { 'id': 1, 'name': 8, 'siteName': 5, 'description': 5, 'content': 8 } query = Query(query_str) \ .fuzz(2) \ .boost_importance(5) terms_query = Query(query_str, as_phrase=False, escape=True, sanitize=True) \ .fuzz(1) \ .for_fields(fields) query = query.select_or(terms_query) kwargs['default_field'] = 'content' kwargs['highlight_fields'] = 'content,description' elif core == 'courseItem': fields = { 'id': 1, 'name': 9, 'description': 8, 'subjectData': 5, } query = Query(query_str).fuzz(2) terms_query = Query(query_str, as_phrase=False, escape=True, sanitize=True) \ .for_fields(fields) query = query.select_or(terms_query) kwargs['default_field'] = 'name' kwargs['highlight_fields'] = 'description' elif core == 'redditPost': fields = { 'id': 1, 'name': 7, 'description': 10, 'comments': 6, } query = Query(query_str).fuzz(1) terms_query = Query(query_str, as_phrase=False, escape=True, sanitize=True) \ .for_fields(fields) query = query.select_or(terms_query) kwargs['default_field'] = 'name' kwargs['highlight_fields'] = 'description,comments' else: query = Query(query_str) return (str(query), kwargs)