def query(cls, text): # user = User.objects.get(pk=user_id) text=text.strip() cls.ES.refresh() q = StringQuery(text) highlighter = HighLighter(['<em>'],['</em>']) # highlighter = HighLighter() s = Search(q,highlight=highlighter) s.add_highlight('title') s.add_highlight('content') results = cls.ES.search(s,indices=['%s-index' % cls.name]) # logging.user(user, "~FGSearch ~FCsaved stories~FG for: ~SB%s" % text) if not results.total: # logging.user(user, "~FGSearch ~FCsaved stories~FG by title: ~SB%s" % text) q = FuzzyQuery('title', text) results = cls.ES.search(q) if not results.total: # logging.user(user, "~FGSearch ~FCsaved stories~FG by content: ~SB%s" % text) q = FuzzyQuery('content', text) results = cls.ES.search(q) if not results.total: # logging.user(user, "~FGSearch ~FCsaved stories~FG by author: ~SB%s" % text) q = FuzzyQuery('author', text) results = cls.ES.search(q) return results
def test_iterator(self): resultset = self.conn.search(Search(MatchAllQuery(), size=20), self.index_name, self.document_type) self.assertEqual(len([p for p in resultset]), 20) resultset = self.conn.search(Search(MatchAllQuery(), size=10), self.index_name, self.document_type) self.assertEqual(len([p for p in resultset[:10]]), 10) self.assertEqual(resultset[10].uuid, "11111") self.assertEqual(resultset.total, 1000)
def test_QueryHighlight(self): q = Search(QueryStringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) self.assertEqual(resultset.total, 2) self.assertNotEqual(resultset[0]._meta.highlight, None) self.assertEqual(resultset[0]._meta.highlight[u"parsedtext"][0].strip(), u'<em>Joe</em> Testere nice guy')
def test_QueryHighlightWithHighLighter(self): h = HighLighter(['<b>'], ['</b>']) q = Search(QueryStringQuery("joe"), highlight=h) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) self.assertEqual(resultset.total, 2) self.assertNotEqual(resultset[0]._meta.highlight, None) self.assertEqual(resultset[0]._meta.highlight[u"parsedtext"][0].strip(), u'<b>Joe</b> Testere nice guy')
def test_iterator_offset(self): # Query for a block of 10, starting at position 10: # resultset = self.conn.search(Search( MatchAllQuery(), start=10, size=10, sort={'position': { 'order': 'asc' }}), self.index_name, self.document_type, start=10, size=10) # Ensure that there are 1000 results: # self.assertEqual(len(resultset), 1000) # Now check that we actually have records 10-19, rather than 0-9: # position = 0 for r in resultset: self.assertEqual(r.position, position + 10) position += 1
def get_search(self, request, **kwargs): """ Execute a search query to elasticsearch Request parameters are: - `q`: string query - `types`: set of document types (`contact`, `organization`, `invoice`, ...) A minimum of 2 chars are required for the query to be processed (wildcards excluded). """ import re import pyes from pyes.query import Search, StringQuery self.method_check(request, allowed=['get']) self.is_authenticated(request) self.throttle_check(request) try: # Tenant (slug) must be present tenant = request.tenant.slug # By default, search is made among all types, can be overriden passing a types argument doc_types = request.GET.getlist('types') # The 'q' parameter represents the query query = request.GET.get('q') # The query must be a string and composed by at least 2 chars (ES wildcards excluded) assert (isinstance(query, basestring) and len(re.sub('[?*]', '', query)) >= 2) except: return http.HttpBadRequest() try: conn = pyes.ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH) q = Search(StringQuery(query)) resultset = conn.search( q, indices=tenant, doc_types=u",".join(doc_types) if doc_types else None) searched_items = [] for res in resultset: res.update({ 'id': res._meta['id'], 'resource_type': res._meta['type'], 'score': res._meta['score'] }) searched_items.append(res) except: return http.HttpBadRequest() self.log_throttled_access(request) paginator = self._meta.paginator_class( request.GET, searched_items, resource_uri=self.get_resource_uri(), limit=self._meta.limit) to_be_serialized = paginator.page() return self.create_response(request, to_be_serialized, response_class=http.HttpResponse)
def find_most_similar_movie(movie_title, movie_year): query = MultiMatchQuery(['title', 'original_title', 'aka'], movie_title) results = elasticsearch.search(Search(query=query, size=50), indices='rightchannel', doc_types='movie') max_score = 0 max_movie = None for r in results: title = r.get('title') original_title = r.get('original_title') aka = r.get('aka') year = r.get('year') if year and year == movie_year: score = 0 if title: score = max(calc_similarity(title, movie_title), score) if original_title: score = max(calc_similarity(original_title, movie_title), score) if aka: for t in aka: score = max(calc_similarity(t, movie_title), score) if score > max_score: max_score = score max_movie = r if max_score > 0.9999: break return max_movie, max_score
def test_sorting_by_geolocation(self): search = Search(MatchAllQuery()) search.sort.add(GeoSortOrder(field='location', lat=1, lon=1)) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['1', '2', '3'])
def test_sorting_by_script(self): search = Search(MatchAllQuery()) search.sort.add(ScriptSortOrder("1.0/doc['foo'].value", type='number')) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['3', '2', '1'])
def test_sorting_by_foo(self): search = Search(MatchAllQuery()) search.sort.add(SortOrder('foo', order='desc')) resultset = self.conn.search(search, indices=self.index_name, doc_types=[self.document_type]) ids = [doc['_id'] for doc in resultset.hits] self.assertEqual(ids, ['3', '2', '1'])
def query(cls, text): # user = User.objects.get(pk=user_id) text = text.strip() cls.ES.refresh() q = StringQuery(text) highlighter = HighLighter(['<em>'], ['</em>']) # highlighter = HighLighter() s = Search(q, highlight=highlighter) s.add_highlight('title') s.add_highlight('content') results = cls.ES.search(s, indices=['%s-index' % cls.name]) # logging.user(user, "~FGSearch ~FCsaved stories~FG for: ~SB%s" % text) if not results.total: # logging.user(user, "~FGSearch ~FCsaved stories~FG by title: ~SB%s" % text) q = FuzzyQuery('title', text) results = cls.ES.search(q) if not results.total: # logging.user(user, "~FGSearch ~FCsaved stories~FG by content: ~SB%s" % text) q = FuzzyQuery('content', text) results = cls.ES.search(q) if not results.total: # logging.user(user, "~FGSearch ~FCsaved stories~FG by author: ~SB%s" % text) q = FuzzyQuery('author', text) results = cls.ES.search(q) return results
def test_QueryHighlight(self): q = Search(StringQuery("joe")) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) self.assertEquals(resultset.total, 2) self.assertNotEqual(resultset[0]._meta.highlight, None) self.assertEquals( resultset[0]._meta.highlight[u"parsedtext"][0].strip(), u'<em>Joe</em> Testere nice guy')
def test_QueryHighlightWithHighLighter(self): h = HighLighter(['<b>'], ['</b>']) q = Search(StringQuery("joe"), highlight=h) q.add_highlight("parsedtext") q.add_highlight("name") resultset = self.conn.search(q, indices=self.index_name) self.assertEquals(resultset.total, 2) self.assertNotEqual(resultset[0]._meta.highlight, None) self.assertEquals( resultset[0]._meta.highlight[u"parsedtext"][0].strip(), u'<b>Joe</b> Testere nice guy')
def search_add_sort(query, sort_field, sort_type): search = Search(query) sort_order = SortOrder(sort_field, sort_type) search.sort.add(sort_order) return search
def run(self): while True: try: movie_item = movie_pool.get() query = MultiMatchQuery(['title', 'original_title', 'aka'], movie_item.title) results = elasticsearch.search( Search(query=query, size=10), indices=settings['elasticsearch']['index'], doc_types='movie') max_score = 0 max_movie = None for r in results: title = r.get('title') original_title = r.get('original_title') aka = r.get('aka') year = r.get('year') countries = r.get('countries') directors = r.get('directors') casts = r.get('casts') score = 0 if title: score = max(calc_similarity(title, movie_item.title), score) if original_title: score = max( calc_similarity(original_title, movie_item.title), score) if aka: for t in aka: score = max(calc_similarity(t, movie_item.title), score) if movie_item.year and year and movie_item.year == year: score += 1 if movie_item.countries and countries: for country in movie_item.countries: if country in countries: score += 1 if movie_item.directors and directors: for director in movie_item.directors: if director in directors: score += 1 if movie_item.casts and casts: for cast in movie_item.casts: if cast in casts: score += 1 if score > max_score: max_score = score max_movie = r if max_movie: mongodb['movies'].update( {'_id': ObjectId(max_movie.get('_id'))}, { '$set': { 'resources.online.%s' % movie_item.provider: { 'url': movie_item.url, 'similarity': max_score, 'last_updated': datetime.datetime.utcnow() } } }) self.logger.info('%s(%s) %s(douban) %s(similarity)', movie_item.title, movie_item.provider, max_movie.get('title'), max_score) else: self.logger.warn('No similar movie for %s(%s)', movie_item.title, movie_item.provider) except PyMongoError, e: self.logger.error('Mongodb error %s' % e) except Exception, e: self.logger.error(e)