def query(self, criteria): """ Elasticsearch complex query (manage results over 10000 results) :params string criteria: complex query criterias :returns: objects in elasticsearch result """ global_results = Results(self.logger, current=str(inspect.stack()[0][1]) + "." + str(inspect.stack()[0][3])) limit = self.limit max_retry = 10 header_criteria = criteria.copy() header_criteria['size'] = 0 results = self.es.search(index=self.index, body=header_criteria, request_timeout=self.timeout) if "size" in criteria: query_size = criteria["size"] else: query_size = results['hits']['total'] global_results.set_total(query_size) #init loop variables results_status = 0 current_retry = 0 current_timeout = self.timeout timeout_exit = False #work around for nested it seems to not work properly with helpers # ES Error while using helpers.scan nested: SearchParseException[failed to parse search source # Issue opened https://github.com/elastic/elasticsearch-py/issues/466 self.logger.debug("storage.query es.search:" + json.dumps(criteria)) if query_size < limit or ("topics.score" in json.dumps(criteria)): results = self.es.search(index=self.index, body=criteria, request_timeout=self.timeout, size=query_size) global_results.set_total(1) global_results.add_success(criteria) else: self.logger.debug("storage.query helpers.scan:" + json.dumps(criteria)) #org.elasticsearch.search.query.QueryPhaseExecutionException: Batch size is too large, size must be less than or equal to: [10000]. Scroll batch sizes cost as much memory as result windows so they are controlled by the [index.max_result_window] index level setting. results_gen = helpers.scan( self.es, query=criteria, scroll=self.config['ES_SEARCH_CACHING_DELAY'], preserve_order=True, request_timeout=self.timeout, size=1000, raise_on_error=False) global_results.add_success(criteria) # for result in results_gen: results['hits']['hits'].append(results_gen) # global_results.add_success({'id':result['_id']}) # del results_gen # gc.collect() return [results, global_results]