def setUp(self): super(DiscoveryIntTest, self).setUp() self._start_container() self.addCleanup(DiscoveryIntTest.es_cleanup) self.container.start_rel_from_url('res/deploy/r2dm.yml') self.discovery = DiscoveryServiceClient() self.catalog = CatalogManagementServiceClient() self.ims = IndexManagementServiceClient() self.rr = ResourceRegistryServiceClient() if use_es: self.es_host = CFG.get_safe('server.elasticsearch.host', 'localhost') self.es_port = CFG.get_safe('server.elasticsearch.port', '9200') CFG.server.elasticsearch.shards = 1 CFG.server.elasticsearch.replicas = 0 CFG.server.elasticsearch.river_shards = 1 CFG.server.elasticsearch.river_replicas = 0 self.es = ep.ElasticSearch( host=self.es_host, port=self.es_port, timeout=10, verbose=True ) op = DotDict(CFG) op.op = 'clean_bootstrap' self.container.spawn_process('index_bootstrap','ion.processes.bootstrap.index_bootstrap','IndexBootStrap', op)
def autocomplete(request): preferred_language = translation.get_language() if request.GET.get('q'): results = [] q = request.GET['q'] search = ep.ElasticSearch() search.size(10) labels_q = ep.ElasticQuery().wildcard('labels_orig',"*" + q + "*") labels_results = search.search_advanced('thesaurus','terms',labels_q) matching_uris = [] for res in labels_results["hits"]["hits"]: matching_uris.append(res["_source"]["uri"]) if len(matching_uris) < 2: alt_labels_q = ep.ElasticQuery().wildcard('alt_labels_orig',"*" + q + "*") alt_labels_results = search.search_advanced('thesaurus','terms',alt_labels_q) #matching_uris = [] for res in alt_labels_results["hits"]["hits"]: matching_uris.append(res["_source"]["uri"]) uris = set(matching_uris) for u in uris: results.append({'url':u, 'value':get_preferred_label(URIRef(u),preferred_language)}) return HttpResponse(json.dumps(results), content_type='application/json')
def on_start(self): if not self.CFG.get_safe('system.elasticsearch', False): text = 'Can not initialize indexes without ElasticSearch enabled. Please enable system.elasticsearch.' log.error(text) raise BadRequest(text) self.sysname = get_sys_name().lower() self.es_host = self.CFG.get_safe('server.elasticsearch.host', 'localhost') self.es_port = self.CFG.get_safe('server.elasticsearch.port', '9200') self.index_shards = self.CFG.get_safe('server.elasticsearch.shards', 5) self.index_replicas = self.CFG.get_safe( 'server.elasticsearch.replicas', 1) self.river_shards = self.CFG.get_safe( 'server.elasticsearch.river_shards', 5) self.river_replicas = self.CFG.get_safe( 'server.elasticsearch.river_replicas', 1) self.es = ep.ElasticSearch(host=self.es_host, port=self.es_port, timeout=10) op = self.CFG.get('op', None) if op == 'index_bootstrap': self.index_bootstrap() elif op == 'clean_bootstrap': self.clean_bootstrap() else: raise BadRequest('Operation Unknown')
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2dm.yml') self.es = ep.ElasticSearch(host=CFG.server.elasticsearch.host, port=CFG.server.elasticsearch.port) self.assertTrue(CFG.system.force_clean) self.wipe()
def query_term(self, source_id='', field='', value='', fuzzy=False, match=False, order=None, limit=0, offset=0, id_only=False): ''' Elasticsearch Query against an index > discovery.query_index('indexID', 'name', '*', order={'name':'asc'}, limit=20, id_only=False) ''' if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable system.elasticsearch to make queries.') validate_true(source_id, 'Unspecified source_id') validate_true(field, 'Unspecified field') validate_true(value, 'Unspecified value') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_term, source, field=field, value=value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index, ElasticSearchIndex, '%s does not refer to a valid index.' % index) if order: validate_is_instance(order,dict, 'Order is incorrect.') es.sort(**order) if limit: es.size(limit) if offset: es.from_offset(offset) if field == '*': field = '_all' if fuzzy: query = ep.ElasticQuery.fuzzy_like_this(value, fields=[field]) elif match: match_query = ep.ElasticQuery.match(field=field,query=value) query = {"match_phrase_prefix":match_query['match']} elif '*' in value: query = ep.ElasticQuery.wildcard(field=field, value=value) else: query = ep.ElasticQuery.field(field=field, query=value) response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def es_cleanup(): es_host = CFG.get_safe('server.elasticsearch.host', 'localhost') es_port = CFG.get_safe('server.elasticsearch.port', '9200') es = ep.ElasticSearch(host=es_host, port=es_port, timeout=10) indexes = STD_INDEXES.keys() indexes.append('%s_resources_index' % get_sys_name().lower()) indexes.append('%s_events_index' % get_sys_name().lower()) for index in indexes: IndexManagementService._es_call(es.river_couchdb_delete, index) IndexManagementService._es_call(es.index_delete, index)
def query_time(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False): if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') if from_value is not None: validate_is_instance(from_value,basestring,'"From" is not a valid string (%s)' % from_value) if to_value is not None: validate_is_instance(to_value,basestring,'"To" is not a valid string') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_time on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_time, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id) if order: validate_is_instance(order,dict,'Order is incorrect.') es.sort(**order) if limit: es.size(limit) if field == '*': field = '_all' if from_value is not None: from_value = calendar.timegm(dateutil.parser.parse(from_value).timetuple()) * 1000 if to_value is not None: to_value = calendar.timegm(dateutil.parser.parse(to_value).timetuple()) * 1000 query = ep.ElasticQuery.range( field = field, from_value = from_value, to_value = to_value ) response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def query_geo_distance(self, source_id='', field='', origin=None, distance='', units='mi',order=None, limit=0, offset=0, id_only=False): validate_true(isinstance(origin,(tuple,list)) , 'Origin is not a list or tuple.') validate_true(len(origin)==2, 'Origin is not of the right size: (2)') if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) iterate = self._multi(self.query_geo_distance, source=source, field=field, origin=origin, distance=distance) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex, '%s does not refer to a valid index.' % index) sorts = ep.ElasticSort() if order is not None and isinstance(order,dict): sort_field = order.keys()[0] value = order[sort_field] sorts.sort(sort_field,value) es.sorted(sorts) if limit: es.size(limit) if offset: es.from_offset(offset) if field == '*': field = '_all' sorts.geo_distance(field, origin, units) es.sorted(sorts) filter = ep.ElasticFilter.geo_distance(field,origin, '%s%s' %(distance,units)) es.filtered(filter) query = ep.ElasticQuery.match_all() response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response,id_only)
def query_geo_bbox(self, source_id='', field='', top_left=None, bottom_right=None, order=None, limit=0, offset=0, id_only=False): validate_true(isinstance(top_left, (list,tuple)), 'Top Left is not a list or a tuple') validate_true(len(top_left)==2, 'Top Left is not of the right size: (2)') validate_true(isinstance(bottom_right, (list,tuple)), 'Bottom Right is not a list or a tuple') validate_true(len(bottom_right)==2, 'Bottom Right is not of the right size: (2)') if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) iterate = self._multi(self.query_geo_bbox, source=source, field=field, top_left=top_left, bottom_right=bottom_right, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex, '%s does not refer to a valid index.' % index) sorts = ep.ElasticSort() if order is not None and isinstance(order,dict): sort_field = order.keys()[0] value = order[sort_field] sorts.sort(sort_field,value) es.sorted(sorts) if limit: es.size(limit) if offset: es.from_offset(offset) if field == '*': field = '_all' filter = ep.ElasticFilter.geo_bounding_box(field, top_left, bottom_right) es.filtered(filter) query = ep.ElasticQuery.match_all() response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response,id_only)
def query_range(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False): if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') validate_true(not from_value is None, 'from_value not specified') validate_true(isinstance(from_value,int) or isinstance(from_value,float), 'from_value is not a valid number') validate_true(not to_value is None, 'to_value not specified') validate_true(isinstance(to_value,int) or isinstance(to_value,float), 'to_value is not a valid number') validate_true(source_id, 'source_id not specified') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_range, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id) if order: validate_is_instance(order,dict,'Order is incorrect.') es.sort(**order) if limit: es.size(limit) if field == '*': field = '_all' query = ep.ElasticQuery().range( field = field, from_value = from_value, to_value = to_value ) response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def search(request): context = {} query = ep.ElasticQuery().query_string(query=request.POST['text']) sr = ep.ElasticSearch() p = sr.search_advanced('student', 'info', query) if len(p['hits']['hits']) > 0: stu_list = [] p = sr.return_records('student', 'info', query) num = len(p) for obj in p: stu_list.append(obj['_source']) context = {'posts': stu_list, 'count': num} return render(request, 'search.html', context)
def search(request): preferred_language = translation.get_language() if request.GET.get('q'): results = [] q = request.GET['q'] search = ep.ElasticSearch() search.size(8000) labels_q = ep.ElasticQuery().match('labels',q) labels_results = search.search_advanced('thesaurus','terms',labels_q) for res in labels_results["hits"]["hits"]: results.append({'pref_label': get_preferred_label(URIRef(res["_source"]["uri"]), preferred_language), 'uri':res["_source"]["uri"]}) alt_labels_q = ep.ElasticQuery().match('alt_labels',q) alt_labels_results = search.search_advanced('thesaurus','terms',alt_labels_q) for res in alt_labels_results["hits"]["hits"]: pref_label = get_preferred_label(URIRef(res["_source"]["uri"]), preferred_language) to_append = {'pref_label': pref_label, 'uri':res["_source"]["uri"]} if not to_append in results: results.append(to_append) query = ep.ElasticQuery().match('_all',q) this_results = search.search_advanced('thesaurus','terms',query) #print(this_results["hits"]["hits"]) for res in this_results["hits"]["hits"]: pref_label = get_preferred_label(URIRef(res["_source"]["uri"]), preferred_language) to_append = {'pref_label': pref_label, 'uri':res["_source"]["uri"]} if not to_append in results: results.append(to_append) try: page = request.GET.get('page',1) except PageNotAnInteger: page = 1 p = Paginator(results, 20, request=request) paginated_results = p.page(page) return render(request, 'thesaurus/search.html', {'results': paginated_results })
def test_query(self): query = ep.ElasticQuery().query_string(query='anm') search = ep.ElasticSearch() self.assertTrue(search.search_advanced('student', 'info', query))
def query_vertical_bounds(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False): if from_value is not None: validate_is_instance(from_value,float,'"From" is not a valid float (%s)' % from_value) if to_value is not None: validate_is_instance(to_value,float,'"To" is not a valid float') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_time on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_time, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id) if order: validate_is_instance(order,dict,'Order is incorrect.') es.sort(**order) if field == '*': field = '_all' vertical_min = 'geospatial_vertical_min' vertical_max = 'geospatial_vertical_max' else: vertical_min = '%s.geospatial_vertical_min' % field vertical_max = '%s.geospatial_vertical_max' % field query = { "query": { "match_all": {} }, "filter": { "and": [ { "or": [ { "range": { vertical_min: { "gte": from_value } } }, { "range": { vertical_max: { "gte": from_value } } } ] }, { "or": [ { "range": { vertical_min: { "lte": to_value } } }, { "range": { vertical_max: { "lte": to_value } } } ] } ] } } if limit: query['size'] = limit if offset: query['from'] = offset response = IndexManagementService._es_call(es.raw_query,'%s/_search' % index.index_name,method='POST', data=query, host=self.elasticsearch_host, port=self.elasticsearch_port) IndexManagementService._check_response(response) retval= self._results_from_response(response, id_only) return retval
def query_time_bounds(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False): if from_value is not None: validate_is_instance(from_value,basestring,'"From" is not a valid string (%s)' % from_value) if to_value is not None: validate_is_instance(to_value,basestring,'"To" is not a valid string') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_time on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_time, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id) if order: validate_is_instance(order,dict,'Order is incorrect.') es.sort(**order) if field == '*': field = '_all' start_time = 'start_datetime' end_time = 'end_datetime' else: start_time = '%s.start_datetime' % field end_time = '%s.end_datetime' % field if from_value is not None: from_value = calendar.timegm(dateutil.parser.parse(from_value).timetuple()) * 1000 if to_value is not None: to_value = calendar.timegm(dateutil.parser.parse(to_value).timetuple()) * 1000 query = { "query": { "match_all": {} }, "filter": { "and": [ { "or": [ { "range": { start_time: { "gte": from_value } } }, { "range": { end_time: { "gte": from_value } } } ] }, { "or": [ { "range": { start_time: { "lte": to_value } } }, { "range": { end_time: { "lte": to_value } } } ] } ] } } if limit: query['size'] = limit if offset: query['from'] = offset response = IndexManagementService._es_call(es.raw_query,'%s/_search' % index.index_name,method='POST', data=query, host=self.elasticsearch_host, port=self.elasticsearch_port) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)