def setup(cls, settings): cls.settings = settings.mget('elasticsearch') cls.settings.setdefault('chunk_size', 500) try: _hosts = cls.settings.hosts hosts = [] for (host, port) in [ split_strip(each, ':') for each in split_strip(_hosts) ]: hosts.append(dict(host=host, port=port)) params = {} if cls.settings.asbool('sniff'): params = dict(sniff_on_start=True, sniff_on_connection_fail=True) cls.api = elasticsearch.Elasticsearch( hosts=hosts, serializer=engine.ESJSONSerializer(), connection_class=ESHttpConnection, **params) log.info('Including Elasticsearch. %s' % cls.settings) except KeyError as e: raise Exception('Bad or missing settings for elasticsearch. %s' % e)
def setup(cls, settings): cls.settings = settings.mget('elasticsearch') cls.settings.setdefault('chunk_size', 500) try: _hosts = cls.settings.hosts hosts = [] for (host, port) in [ split_strip(each, ':') for each in split_strip(_hosts)]: hosts.append(dict(host=host, port=port)) params = {} if cls.settings.asbool('sniff'): params = dict( sniff_on_start=True, sniff_on_connection_fail=True ) cls.api = elasticsearch.Elasticsearch( hosts=hosts, serializer=engine.ESJSONSerializer(), connection_class=ESHttpConnection, **params) log.info('Including Elasticsearch. %s' % cls.settings) except KeyError as e: raise Exception( 'Bad or missing settings for elasticsearch. %s' % e)
def run(self, quiet=False): from nefertari.elasticsearch import ES ES.setup(self.settings) models_paths = split_strip(self.options.models) for path in models_paths: model = resolve(path) model_name = path.split('.')[-1] params = self.options.params or '' params = dict([ [k, v[0]] for k, v in urlparse.parse_qs(params).items() ]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: es.index(documents, chunk_size=chunk_size) else: es.index_missing(documents, chunk_size=chunk_size) return 0
def run(self, quiet=False): from nefertari.elasticsearch import ES ES.setup(self.settings) models_paths = split_strip(self.options.models) for path in models_paths: model = resolve(path) model_name = path.split('.')[-1] params = self.options.params or '' params = dict([[k, v[0]] for k, v in urlparse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: es.index(documents, chunk_size=chunk_size) else: es.index_missing(documents, chunk_size=chunk_size) return 0
def run(self): if self.options.models: model_names = split_strip(self.options.models) models = [engine.get_document_cls(name) for name in model_names] else: models = None try: from_ace = json.loads(self.options.from_ace) except ValueError as ex: raise ValueError('--from_ace: {}'.format(ex)) try: to_ace = json.loads(self.options.to_ace) except ValueError as ex: raise ValueError('--to_ace: {}'.format(ex)) six.print_('Updating documents ACE') update_ace(from_ace=from_ace, to_ace=to_ace, models=models) try: import transaction transaction.commit() except: pass six.print_('Done')
def run(self): ES.setup(self.settings) model_names = split_strip(self.options.models) for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) params = self.options.params or '' params = dict([[k, v[0]] for k, v in urllib.parse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: self.log.info('Recreating `{}` ES mapping'.format(model_name)) es.delete_mapping() es.put_mapping(body=model.get_es_mapping()) self.log.info('Indexing all `{}` documents'.format(model_name)) es.index(documents) else: self.log.info( 'Indexing missing `{}` documents'.format(model_name)) es.index_missing_documents(documents) return 0
def run(self): ES.setup(self.settings) model_names = split_strip(self.options.models) for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) params = self.options.params or '' params = dict([ [k, v[0]] for k, v in urllib.parse.parse_qs(params).items() ]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: self.log.info('Recreating `{}` ES mapping'.format(model_name)) es.delete_mapping() es.put_mapping(body=model.get_es_mapping()) self.log.info('Indexing all `{}` documents'.format( model_name)) es.index(documents) else: self.log.info('Indexing missing `{}` documents'.format( model_name)) es.index_missing_documents(documents) return 0
def run(self): ES.setup(self.settings) if self.options.recreate: self.recreate_index() models = engine.get_document_classes() model_names = [ name for name, model in models.items() if getattr(model, '_index_enabled', False)] else: model_names = split_strip(self.options.models) self.index_models(model_names)
def run(self): ES.setup(self.settings) if self.options.recreate: self.recreate_index() models = engine.get_document_classes() model_names = [ name for name, model in models.items() if getattr(model, '_index_enabled', False) ] else: model_names = split_strip(self.options.models) self.index_models(model_names)
def setup_database(config): settings = dictset(config.registry.settings).mget('elasticsearch') params = {} params['chunk_size'] = settings.get('chunk_size', 500) params['hosts'] = [] for hp in split_strip(settings['hosts']): h, p = split_strip(hp, ':') params['hosts'].append(dict(host=h, port=p)) if settings.asbool('sniff'): params['sniff_on_start'] = True params['sniff_on_connection_fail'] = True # XXX if this connection has to deal with mongo and sqla objects, # then we'll need to use their es serializers instead. should # probably clean up that part of the engine interface - there's # lots of repeated code, plus other engines shouldn't have to know # about es - they should just know how to serialize their # documents to JSON. conn = connections.create_connection( serializer=JSONSerializer(), **params) setup_index(conn, settings)
def process_fields_param(fields): """ Process 'fields' ES param. * Fields list is split if needed * '_type' field is added, if not present, so the actual value is displayed instead of 'None' """ if not fields: return fields if isinstance(fields, six.string_types): fields = split_strip(fields) if "_type" not in fields: fields.append("_type") return {"_source_include": fields, "_source": True}
def process_fields_param(fields): """ Process 'fields' ES param. * Fields list is split if needed * '_type' field is added, if not present, so the actual value is displayed instead of 'None' """ if not fields: return fields if isinstance(fields, six.string_types): fields = split_strip(fields) if '_type' not in fields: fields.append('_type') return { '_source_include': fields, '_source': True, }
def run(self): if self.options.models: model_names = split_strip(self.options.models) models = [engine.get_document_cls(name) for name in model_names] else: models = None try: ace = json.loads(self.options.ace) except ValueError as ex: raise ValueError('--ace: {}'.format(ex)) counts = count_ace(ace=ace, models=models) six.print_('Model,Count') for model, count in counts.items(): if count is None: count = 'Not es-based' six.print_('{},{}'.format(model.__name__, count))
def get_collection(cls, _count=False, _strict=True, _sort=None, _fields=None, _limit=None, _page=None, _start=None, _query_set=None, _item_request=False, _explain=None, _search_fields=None, q=None, **params): """ Query collection and return results. Notes: * Before validating that only model fields are present in params, reserved params, query params and all params starting with double underscore are dropped. * Params which have value "_all" are dropped. * When ``_count`` param is used, objects count is returned before applying offset and limit. :param bool _strict: If True ``params`` are validated to contain only fields defined on model, exception is raised if invalid fields are present. When False - invalid fields are dropped. Defaults to ``True``. :param list _sort: Field names to sort results by. If field name is prefixed with "-" it is used for "descending" sorting. Otherwise "ascending" sorting is performed by that field. Defaults to an empty list in which case sorting is not performed. :param list _fields: Names of fields which should be included or excluded from results. Fields to excluded should be prefixed with "-". Defaults to an empty list in which case all fields are returned. :param int _limit: Number of results per page. Defaults to None in which case all results are returned. :param int _page: Number of page. In conjunction with ``_limit`` is used to calculate results offset. Defaults to None in which case it is ignored. Params ``_page`` and ``_start` are mutually exclusive. :param int _start: Results offset. If provided ``_limit`` and ``_page`` params are ignored when calculating offset. Defaults to None. Params ``_page`` and ``_start`` are mutually exclusive. If not offset-related params are provided, offset equals to 0. :param Query _query_set: Existing queryset. If provided, all queries are applied to it instead of creating new queryset. Defaults to None. :param bool _item_request: Indicates whether it is a single item request or not. When True and DataError happens on DB request, JHTTPNotFound is raised. JHTTPBadRequest is raised when False. Defaults to ``False``. :param _count: When provided, only results number is returned as integer. :param _explain: When provided, query performed(SQL) is returned as a string instead of query results. :param bool _raise_on_empty: When True JHTTPNotFound is raised if query returned no results. Defaults to False in which case error is just logged and empty query results are returned. :param q: Query string to perform full-text search with. :param _search_fields: Coma-separated list of field names to use with full-text search(q param) to limit fields which are searched. :returns: Query results as ``elasticsearch_dsl.XXX`` instance. May be sorted, offset, limited. :returns: Dict of {'field_name': fieldval}, when ``_fields`` param is provided. :returns: Number of query results as an int when ``_count`` param is provided. :raises JHTTPNotFound: When ``_raise_on_empty=True`` and no results found. :raises JHTTPNotFound: When ``_item_request=True`` and ``sqlalchemy.exc.DataError`` exception is raised during DB query. Latter exception is raised when querying DB with an identifier of a wrong type. E.g. when querying Int field with a string. :raises JHTTPBadRequest: When ``_item_request=False`` and ``sqlalchemy.exc.DataError`` exception is raised during DB query. :raises JHTTPBadRequest: When ``sqlalchemy.exc.InvalidRequestError`` or ``sqlalchemy.exc.IntegrityError`` errors happen during DB query. """ # see if the items are cached pk_field = cls.pk_field() if (list(params.keys()) == [pk_field] and _count==False and _strict==True and _sort==None and _fields==None and _limit==None and _page==None and _start==None and _query_set==None and _item_request==False and _explain==None and _search_fields==None and q==None): ids = params[pk_field] if not isinstance(ids, (list, tuple)): ids = [ids] results = [] for id in ids: if not id in cls._cache: break results.append(cls._cache[id]) else: return results search_obj = cls.search() if _limit is not None: _start, limit = process_limit(_start, _page, _limit) search_obj = search_obj.extra(from_=_start, size=limit) if _fields: include, exclude = process_fields(_fields) if _strict: _validate_fields(cls, include + exclude) # XXX partial fields support isn't yet released. for now # we just use fields, later we'll add support for excluded fields search_obj = search_obj.fields(include) if params: params = _cleaned_query_params(cls, params, _strict) params = _restructure_params(cls, params) if params: search_obj = search_obj.filter('terms', **params) if q is not None: query_kw = {'query': q} if _search_fields is not None: query_kw['fields'] = _search_fields.split(',') search_obj = search_obj.query('query_string', **query_kw) if _count: return search_obj.count() if _explain: return search_obj.to_dict() if _sort: sort_fields = split_strip(_sort) if _strict: _validate_fields( cls, [f[1:] if f.startswith('-') else f for f in sort_fields] ) search_obj = search_obj.sort(*sort_fields) hits = search_obj.execute().hits hits._nefertari_meta = dict( total=hits.total, start=_start, fields=_fields ) return hits