def get(self, index, id, doc_type='_all', params=None, headers=None): ignore = extract_ignore_as_iterable(params) result = None if index in self.__documents_dict: for document in self.__documents_dict[index]: if document.get('_id') == id: if doc_type == '_all': result = document break else: if document.get('_type') == doc_type: result = document break if result: result['found'] = True return result elif params and 404 in ignore: return {'found': False} else: error_data = { '_index': index, '_type': doc_type, '_id': id, 'found': False } raise NotFoundError(404, json.dumps(error_data))
def test_raises_if_called_for_concrete_index(self, client): """Raise if called for a concrete index.""" client.conn.indices.get_alias.side_effect = NotFoundError( 'test', 'test desc') with pytest.raises(RuntimeError): update_aliased_index(client, 'new-target')
def get(self, index, id, doc_type='_all', params=None): result = None if index in self.__documents_dict: for document in self.__documents_dict[index]: if document.get('_id') == id: if doc_type == '_all': result = document break else: if document.get('_type') == doc_type: result = document break if result: result['found'] = True else: error_data = { '_index': index, '_type': doc_type, '_id': id, 'found': False } raise NotFoundError(404, json.dumps(error_data)) return result
def count(self, index=None, doc_type=None, body=None, params=None): if index is not None and index not in self.__documents_dict: raise NotFoundError( 404, 'IndexMissingException[[{0}] missing]'.format(index)) searchable_indexes = [ index ] if index is not None else self.__documents_dict.keys() i = 0 for searchable_index in searchable_indexes: for document in self.__documents_dict[searchable_index]: if doc_type is not None and document.get('_type') != doc_type: continue i += 1 result = { 'count': i, '_shards': { 'successful': 1, 'failed': 0, 'total': 1 } } return result
def _build_response(result): objs, error_docs, missing_docs = [], [], [] for doc in result['docs']: if doc.get('found'): if error_docs or missing_docs: # We're going to raise an exception anyway, so avoid an # expensive call to cls.from_es(). continue objs.append(cls.from_es(doc)) elif doc.get('error'): if raise_on_error: error_docs.append(doc) if missing == 'none': objs.append(None) # The doc didn't cause an error, but the doc also wasn't found. elif missing == 'raise': missing_docs.append(doc) elif missing == 'none': objs.append(None) if error_docs: error_ids = [doc['_id'] for doc in error_docs] message = 'Required routing/parent not provided for documents %s.' message %= ', '.join(error_ids) raise RequestError(400, message, error_docs) if missing_docs: missing_ids = [doc['_id'] for doc in missing_docs] message = 'Documents %s not found.' % ', '.join(missing_ids) raise NotFoundError(404, message, missing_docs) return objs
def delete(self, index, id, doc_type=None, params=None, headers=None): found = False doc_type = doc_type if index in self.__documents_dict: for document in self.__documents_dict[index]: if document.get('_id') == id: found = True doc_type = document.get('_type') self.__documents_dict[index].remove(document) break result_dict = { 'found': found, '_index': index, '_type': doc_type, '_id': id, '_version': 1, } if found: return result_dict else: raise NotFoundError(404, json.dumps(result_dict))
def delete(self, index, doc_type, id, params=None, headers=None): found = False ignore = extract_ignore_as_iterable(params) if index in self.__documents_dict: for document in self.__documents_dict[index]: if document.get('_type') == doc_type and document.get( '_id') == id: found = True self.__documents_dict[index].remove(document) break result_dict = { 'found': found, '_index': index, '_type': doc_type, '_id': id, '_version': 1, } if found: return result_dict elif params and 404 in ignore: return {'found': False} else: raise NotFoundError(404, json.dumps(result_dict))
def verify_es_response(response): # if the query took 0 it means no index could be matched! if response.took == 0: raise NotFoundError(404, 'index_not_found_exception', {}) # if no hits were found, operation_id was invalied if len(response.hits) == 0: abort(404, "Your search did not result in any hits (wrong id?)")
def test_success(self): client = Mock() datemath = u'{hasthemath}' psuedo_random = u'not_random_at_all' expected = u'curator_get_datemath_function_' + psuedo_random + u'-hasthemath' client.indices.get.side_effect = ( NotFoundError(404, 'simulated error', {u'error':{u'index':expected}})) self.assertEqual('hasthemath', get_datemath(client, datemath, psuedo_random))
def get_record_by_pid(self, pid, fields=None): """Search by pid.""" query = self.filter('term', pid=pid).extra(size=1) if fields: query = query.source(includes=fields) response = query.execute() if response.hits.total.value != 1: raise NotFoundError(f'Record not found pid: {pid}') return response.hits.hits[0]._source
def test_status_missing_index(client, mock_status_externals): """The status JSON shows if the ElasticSearch index is not found.""" mock_status_externals['search'].side_effect = NotFoundError('No Index') url = reverse('health.status') response = client.get(url) data = json.loads(response.content) assert data['services']['search'] == { 'available': True, 'populated': False, 'count': 0, }
def test_status_missing_index(client, mock_status_externals): """The status JSON shows if the ElasticSearch index is not found.""" mock_status_externals["search"].side_effect = NotFoundError("No Index") url = reverse("health.status") response = client.get(url) data = json.loads(response.content) assert data["services"]["search"] == { "available": True, "populated": False, "count": 0, }
def mget(cls, docs, using=None, index=None, raise_on_error=True, missing='none', **kwargs): if missing not in ('raise', 'skip', 'none'): raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") es = connections.get_connection(using or cls._doc_type.using) body = { 'docs': [doc if isinstance(doc, dict) else { '_id': doc } for doc in docs] } results = es.mget(body, index=index or cls._doc_type.index, doc_type=cls._doc_type.name, **kwargs) objs, error_docs, missing_docs = [], [], [] for doc in results['docs']: if doc.get('found'): if error_docs or missing_docs: # We're going to raise an exception anyway, so avoid an # expensive call to cls.from_es(). continue objs.append(cls.from_es(doc)) elif doc.get('error'): if raise_on_error: error_docs.append(doc) if missing == 'none': objs.append(None) # The doc didn't cause an error, but the doc also wasn't found. elif missing == 'raise': missing_docs.append(doc) elif missing == 'none': objs.append(None) if error_docs: error_ids = [doc['_id'] for doc in error_docs] message = 'Required routing/parent not provided for documents %s.' message %= ', '.join(error_ids) raise RequestError(400, message, error_docs) if missing_docs: missing_ids = [doc['_id'] for doc in missing_docs] message = 'Documents %s not found.' % ', '.join(missing_ids) raise NotFoundError(404, message, missing_docs) return objs
def get_account_history( account_id=None, operation_type=None, from_=0, size=10, from_date='2015-10-10', to_date='now', sort_by='-operation_id_num', search_after=None, type='data', # @ReservedAssignment agg_field='operation_type'): s = Search(using=es, index="bitshares-*") if type == "data": s = s.extra(size=size) if search_after and search_after != '': s = s.extra(search_after=search_after.split(',')) else: s = s.extra(**{"from": from_}) q = Q() if account_id and account_id != '': q = q & Q("match", account_history__account=account_id) if (operation_type and operation_type != -1) or operation_type == 0: q = q & Q("match", operation_type=operation_type) range_query = Q("range", block_data__block_time={ 'gte': from_date, 'lte': to_date }) s.query = q & range_query if type != "data": s.aggs.bucket('per_field', 'terms', field=agg_field, size=size) s = s.sort(*sort_by.split(',')) response = s.execute() if response.took == 0: raise NotFoundError(404, 'index_not_found_exception', {}) if len(response.hits) == 0: return [] if type == "data": return [hit.to_dict() for hit in response] else: return [ field.to_dict() for field in response.aggregations.per_field.buckets ]
def test_get_employee_with_non_existing_identifier(mock_get, http_client): """Should return status code 404 When calling the elasticsearch client get method with a target `_id` on a non-existing document, a NotFoundError will be raised. This should be propagated to a service error response of HTTP 404. """ mock_get.side_effect = NotFoundError(404, "This _id does not exist") response = http_client.get("/api/employee/helllasdf") assert response.status_code == 404
def get(self, index, id, doc_type='_all', params=None): result = None if index in self.__documents_dict: result = self.find_document(doc_type, id, index, result) if result: result['found'] = True else: error_data = { '_index': index, '_type': doc_type, '_id': id, 'found': False } raise NotFoundError(404, json.dumps(error_data)) return result
def _normalize_index_to_list(self, index): # Ensure to have a list of index if index is None: searchable_indexes = self.__documents_dict.keys() elif isinstance(index, str): searchable_indexes = [index] elif isinstance(index, list): searchable_indexes = index else: # Is it the correct exception to use ? raise ValueError("Invalid param 'index'") # Check index(es) exists for searchable_index in searchable_indexes: if searchable_index not in self.__documents_dict: raise NotFoundError(404, 'IndexMissingException[[{0}] missing]'.format(searchable_index)) return searchable_indexes
def suggest(self, body, index=None, params=None): if index is not None and index not in self.__documents_dict: raise NotFoundError(404, 'IndexMissingException[[{0}] missing]'.format(index)) result_dict = {} for key, value in body.items(): text = value.get('text') suggestion = int(text) + 1 if isinstance(text, int) else '{0}_suggestion'.format(text) result_dict[key] = [ { 'text': text, 'length': 1, 'options': [ { 'text': suggestion, 'freq': 1, 'score': 1.0 } ], 'offset': 0 } ] return result_dict
def search(self, index=None, doc_type=None, body=None, params=None): if index is not None and index not in self.__documents_dict: raise NotFoundError( 404, 'IndexMissingException[[{0}] missing]'.format(index)) searchable_indexes = [ index ] if index is not None else self.__documents_dict.keys() matches = [] for searchable_index in searchable_indexes: for document in self.__documents_dict[searchable_index]: if doc_type is not None and document.get('_type') != doc_type: continue matches.append(document) result = { 'hits': { 'total': len(matches), 'max_score': 1.0 }, '_shards': { 'successful': 1, 'failed': 0, 'total': 1 }, 'took': 1, 'timed_out': False } if matches: hits = [] for match in matches: match['_score'] = 1.0 hits.append(match) result['hits']['hits'] = hits return result
def mget(cls, docs, using=None, index=None, raise_on_error=True, missing='none', **kwargs): r""" Retrieve multiple document by their ``id``\s. Returns a list of instances in the same order as requested. :arg docs: list of ``id``\s of the documents to be retrieved or a list of document specifications as per https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html :arg index: elasticsearch index to use, if the ``Document`` is associated with an index this can be omitted. :arg using: connection alias to use, defaults to ``'default'`` :arg missing: what to do when one of the documents requested is not found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise ``NotFoundError``) or ``'skip'`` (ignore the missing document). Any additional keyword arguments will be passed to ``Elasticsearch.mget`` unchanged. """ if missing not in ('raise', 'skip', 'none'): raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") es = cls._get_connection(using) body = { 'docs': [ doc if isinstance(doc, collections_abc.Mapping) else {'_id': doc} for doc in docs ] } results = es.mget( body, index=cls._default_index(index), **kwargs ) objs, error_docs, missing_docs = [], [], [] for doc in results['docs']: if doc.get('found'): if error_docs or missing_docs: # We're going to raise an exception anyway, so avoid an # expensive call to cls.from_es(). continue objs.append(cls.from_es(doc)) elif doc.get('error'): if raise_on_error: error_docs.append(doc) if missing == 'none': objs.append(None) # The doc didn't cause an error, but the doc also wasn't found. elif missing == 'raise': missing_docs.append(doc) elif missing == 'none': objs.append(None) if error_docs: error_ids = [doc['_id'] for doc in error_docs] message = 'Required routing not provided for documents %s.' message %= ', '.join(error_ids) raise RequestError(400, message, error_docs) if missing_docs: missing_ids = [doc['_id'] for doc in missing_docs] message = 'Documents %s not found.' % ', '.join(missing_ids) raise NotFoundError(404, message, {'docs': missing_docs}) return objs
def test_unindex_raises_not_found_exception(self): exception = NotFoundError(404, {'not found': 'not found '}, {'foo': 'foo'}) mapping_type = Mock() mapping_type.unindex(side_effect=exception) unindex_objects(mapping_type, [1, 2, 3], 'foo')
def search_by_time(es, index, time_range=15, size=10, mode=0): """ 从 ES 上搜索符合条件的数据 :param es: ES 连接对象 :param index: ES 索引名 :param time_range: 默认时间节点(当前时间往前分钟数) :param size: 搜索分页大小 :param mode: 实例工作模式 :return: 搜索结果列表 """ global scrollId, threadLock, processCount # 有 Scroll 的先走 Scroll scroll_reloaded = False if scrollId: try: ret = es.scroll(scroll='3m', scroll_id=scrollId, body={ "scroll_id": scrollId }) # 处理几种常见错误 if ret['_shards']['failed'] > 0: error_info = json.dumps(ret['_shards']['failures']) if 'search_context_missing_exception' in error_info: # Scroll 失效 if mode: es.clear_scroll(scroll_id=scrollId) raise NotFoundError('Search scroll context missing.') elif 'search.max_open_scroll_context' in error_info: # Scroll 太多,清除后重新生成 if mode: es.clear_scroll(scroll_id='_all') raise NotFoundError('Search scroll context peaked, cleaning ...') elif 'null_pointer_exception' in error_info: # https://github.com/elastic/elasticsearch/issues/35860 raise NotFoundError('Trigger a elasticsearch scroll null pointer exception.') else: output(error_info, LogLevel.INFO) return [] else: if len(ret['hits']['hits']) > 0: return ret['hits']['hits'] else: # 没有数据的情况下等待2秒 time.sleep(2) if mode: es.clear_scroll(scroll_id=scrollId) scroll_reloaded = True raise Exception('Scroll result is empty.') except NotFoundError: scroll_reloaded = True except Exception as e: output(e, LogLevel.WARN) #output(traceback.format_exc(), LogLevel.DEBUG) else: if mode: scroll_reloaded = True # 从节点不主动创建 Scroll,只从 ES 上获取 if not mode: time.sleep(2) output('Fetch new scroll...', LogLevel.INFO) scrollId = get_scroll(es) return [] # 意外导致的无结果直接返回 if not scroll_reloaded: return [] # 默认查询最近x分钟的数据 lastTime = time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime(time.time() - time_range * 60)) query = { "size": size, "query": { "bool": { # "must": [ # {"range": {"@timestamp": {"gte": lastTime}}} # 查询某个时间点之后的数据,默认为当前时间前15分钟 # ], "must_not": [ {"exists": {"field": "state"}} # 只处理没有处理状态字段的数据 ] } }, "sort": { "@timestamp": { "order": "desc" } } } try: output('Start new search context...', LogLevel.INFO) output(query, LogLevel.DEBUG) ret = es.search(index=index, body=query, scroll='3m') if '_scroll_id' in ret: output('Use new scroll id', LogLevel.INFO) scrollId = ret['_scroll_id'] # 保存 scroll_id 供其它实例使用 set_scroll(es, scrollId) output('Search {} documents.'.format(len(ret['hits']['hits'])), LogLevel.INFO) return ret['hits']['hits'] except ConnectionError: output("ES connect error.", LogLevel.ERROR) time.sleep(2) except Exception as e: output(e, LogLevel.ERROR) traceback.print_exc() return []
def test_returns_none_when_no_alias(self, client): """If ``index`` is a concrete index, return None.""" client.conn.indices.get_alias.side_effect = NotFoundError( 'test', 'test desc') assert get_aliased_index(client) is None