def get(self, request, *args, **kwargs): limit = request.GET.get('limit', 5) es_query = { 'apps': { 'completion': { 'field': 'name_suggest', 'size': limit }, 'text': request.GET.get('q', '').strip() } } results = WebappIndexer.get_es().suggest( body=es_query, index=WebappIndexer.get_index()) if 'apps' in results: data = results['apps'][0]['options'] else: data = [] serializer = self.get_serializer(data) # This returns a JSON list. Usually this is a bad idea for security # reasons, but we don't include any user-specific data, it's fully # anonymous, so we're fine. return HttpResponse(json.dumps(serializer.data), content_type='application/x-rocketbar+json')
def tearDown(self): # Cleanup to remove these from the index. self.app1.delete() self.app2.delete() unindex_webapps([self.app1.id, self.app2.id]) # Required to purge the suggestions data structure. In Lucene, a # document is not deleted from a segment, just marked as deleted. WebappIndexer.get_es().indices.optimize(index=WebappIndexer.get_index(), only_expunge_deletes=True)
def tearDown(self): # Cleanup to remove these from the index. self.app1.delete() self.app2.delete() unindex_webapps([self.app1.id, self.app2.id]) # Required to purge the suggestions data structure. In Lucene, a # document is not deleted from a segment, just marked as deleted. WebappIndexer.get_es().indices.optimize( index=WebappIndexer.get_index(), only_expunge_deletes=True)
def _get_indices(self): # Check if we are filtering by a doc_type (e.g., apps, sites). # Default to all content types. doc_type = self.request.GET.get('doc_type', 'all') app_index = WebappIndexer.get_index() site_index = WebsiteIndexer.get_index() if doc_type == 'webapp': return [app_index] elif doc_type == 'website': return [site_index] return [app_index, site_index]
def mget_apps(self, app_ids): """ Takes a list of app_ids. Does an ES mget. Returns an app_map for serializer context. """ app_map = {} es = WebappIndexer.get_es() apps = es.mget(body={'ids': app_ids}, index=WebappIndexer.get_index(), doc_type=WebappIndexer.get_mapping_type_name()) for app in apps['docs']: # Store the apps to attach to feed elements later. app = app['_source'] app_map[app['id']] = app return app_map
def index_webapps(ids, **kw): """TODO: use search/indexers.py:index.""" task_log.info('Indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids))) index = kw.pop('index', WebappIndexer.get_index()) # Note: If reindexing is currently occurring, `get_indices` will return # more than one index. indices = Reindexing.get_indices(index) es = WebappIndexer.get_es(urls=settings.ES_URLS) qs = Webapp.indexing_transformer(Webapp.with_deleted.no_cache().filter( id__in=ids)) for obj in qs: doc = WebappIndexer.extract_document(obj.id, obj) for idx in indices: WebappIndexer.index(doc, id_=obj.id, es=es, index=idx)
def unindex_webapps(ids, **kw): if not ids: return task_log.info('Un-indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids))) index = kw.pop('index', WebappIndexer.get_index()) # Note: If reindexing is currently occurring, `get_indices` will return # more than one index. indices = Reindexing.get_indices(index) es = WebappIndexer.get_es(urls=settings.ES_URLS) for id_ in ids: for idx in indices: try: WebappIndexer.unindex(id_=id_, es=es, index=idx) except ElasticHttpNotFoundError: # Ignore if it's not there. task_log.info( u'[Webapp:%s] Unindexing app but not found in index' % id_)
def get(self, request, *args, **kwargs): limit = request.GET.get('limit', 5) es_query = { 'apps': { 'completion': {'field': 'name_suggest', 'size': limit}, 'text': request.GET.get('q', '').strip() } } results = S(WebappIndexer).get_es().send_request( 'GET', [WebappIndexer.get_index(), '_suggest'], body=es_query) if 'apps' in results: data = results['apps'][0]['options'] else: data = [] serializer = self.get_serializer(data) # This returns a JSON list. Usually this is a bad idea for security # reasons, but we don't include any user-specific data, it's fully # anonymous, so we're fine. return HttpResponse(json.dumps(serializer.data), content_type='application/x-rocketbar+json')
def handle(self, *args, **kwargs): index = WebappIndexer.get_index() doctype = WebappIndexer.get_mapping_type_name() es = WebappIndexer.get_es() app_ids = Webapp.objects.values_list('id', flat=True) missing_ids = [] for app_id in app_ids: try: es.get(index, app_id, doctype, fields='id') except elasticsearch.NotFoundError: # App doesn't exist in our index, add it to `missing_ids`. missing_ids.append(app_id) if missing_ids: sys.stdout.write('Adding %s doc(s) to the index.' % len(missing_ids)) WebappIndexer().run_indexing(missing_ids, es) else: sys.stdout.write('No docs missing from index.')
def test_index(self): with self.settings(ES_INDEXES={'webapp': 'apps'}): eq_(WebappIndexer.get_index(), 'apps')
def test_index(self): with self.settings(ES_INDEXES={"webapp": "apps"}): eq_(WebappIndexer.get_index(), "apps")
def get_queryset(self): return Search( using=BaseIndexer.get_es(), index=[WebappIndexer.get_index(), WebsiteIndexer.get_index()], doc_type=[WebappIndexer.get_mapping_type_name(), WebsiteIndexer.get_mapping_type_name()])