Example #1
0
    def setup(cls, settings):
        cls.settings = settings.mget('elasticsearch')
        cls.settings.setdefault('chunk_size', 500)

        try:
            _hosts = cls.settings.hosts
            hosts = []
            for (host, port) in [
                    split_strip(each, ':') for each in split_strip(_hosts)
            ]:
                hosts.append(dict(host=host, port=port))

            params = {}
            if cls.settings.asbool('sniff'):
                params = dict(sniff_on_start=True,
                              sniff_on_connection_fail=True)

            cls.api = elasticsearch.Elasticsearch(
                hosts=hosts,
                serializer=engine.ESJSONSerializer(),
                connection_class=ESHttpConnection,
                **params)
            log.info('Including Elasticsearch. %s' % cls.settings)

        except KeyError as e:
            raise Exception('Bad or missing settings for elasticsearch. %s' %
                            e)
Example #2
0
    def setup(cls, settings):
        cls.settings = settings.mget('elasticsearch')
        cls.settings.setdefault('chunk_size', 500)

        try:
            _hosts = cls.settings.hosts
            hosts = []
            for (host, port) in [
                    split_strip(each, ':') for each in split_strip(_hosts)]:
                hosts.append(dict(host=host, port=port))

            params = {}
            if cls.settings.asbool('sniff'):
                params = dict(
                    sniff_on_start=True,
                    sniff_on_connection_fail=True
                )

            cls.api = elasticsearch.Elasticsearch(
                hosts=hosts, serializer=engine.ESJSONSerializer(),
                connection_class=ESHttpConnection, **params)
            log.info('Including Elasticsearch. %s' % cls.settings)

        except KeyError as e:
            raise Exception(
                'Bad or missing settings for elasticsearch. %s' % e)
Example #3
0
    def run(self, quiet=False):
        from nefertari.elasticsearch import ES
        ES.setup(self.settings)
        models_paths = split_strip(self.options.models)

        for path in models_paths:
            model = resolve(path)
            model_name = path.split('.')[-1]

            params = self.options.params or ''
            params = dict([
                [k, v[0]] for k, v in urlparse.parse_qs(params).items()
            ])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name, index_name=self.options.index)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                es.index(documents, chunk_size=chunk_size)
            else:
                es.index_missing(documents, chunk_size=chunk_size)

        return 0
Example #4
0
    def run(self, quiet=False):
        from nefertari.elasticsearch import ES
        ES.setup(self.settings)
        models_paths = split_strip(self.options.models)

        for path in models_paths:
            model = resolve(path)
            model_name = path.split('.')[-1]

            params = self.options.params or ''
            params = dict([[k, v[0]]
                           for k, v in urlparse.parse_qs(params).items()])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name, index_name=self.options.index)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                es.index(documents, chunk_size=chunk_size)
            else:
                es.index_missing(documents, chunk_size=chunk_size)

        return 0
Example #5
0
    def run(self):
        if self.options.models:
            model_names = split_strip(self.options.models)
            models = [engine.get_document_cls(name) for name in model_names]
        else:
            models = None

        try:
            from_ace = json.loads(self.options.from_ace)
        except ValueError as ex:
            raise ValueError('--from_ace: {}'.format(ex))

        try:
            to_ace = json.loads(self.options.to_ace)
        except ValueError as ex:
            raise ValueError('--to_ace: {}'.format(ex))

        six.print_('Updating documents ACE')

        update_ace(from_ace=from_ace, to_ace=to_ace, models=models)

        try:
            import transaction
            transaction.commit()
        except:
            pass

        six.print_('Done')
Example #6
0
    def run(self):
        ES.setup(self.settings)
        model_names = split_strip(self.options.models)

        for model_name in model_names:
            self.log.info('Processing model `{}`'.format(model_name))
            model = engine.get_document_cls(model_name)

            params = self.options.params or ''
            params = dict([[k, v[0]]
                           for k, v in urllib.parse.parse_qs(params).items()])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name,
                    index_name=self.options.index,
                    chunk_size=chunk_size)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                self.log.info('Recreating `{}` ES mapping'.format(model_name))
                es.delete_mapping()
                es.put_mapping(body=model.get_es_mapping())
                self.log.info('Indexing all `{}` documents'.format(model_name))
                es.index(documents)
            else:
                self.log.info(
                    'Indexing missing `{}` documents'.format(model_name))
                es.index_missing_documents(documents)

        return 0
Example #7
0
    def run(self):
        ES.setup(self.settings)
        model_names = split_strip(self.options.models)

        for model_name in model_names:
            self.log.info('Processing model `{}`'.format(model_name))
            model = engine.get_document_cls(model_name)

            params = self.options.params or ''
            params = dict([
                [k, v[0]] for k, v in urllib.parse.parse_qs(params).items()
            ])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name, index_name=self.options.index,
                    chunk_size=chunk_size)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                self.log.info('Recreating `{}` ES mapping'.format(model_name))
                es.delete_mapping()
                es.put_mapping(body=model.get_es_mapping())
                self.log.info('Indexing all `{}` documents'.format(
                    model_name))
                es.index(documents)
            else:
                self.log.info('Indexing missing `{}` documents'.format(
                    model_name))
                es.index_missing_documents(documents)

        return 0
    def run(self):
        if self.options.models:
            model_names = split_strip(self.options.models)
            models = [engine.get_document_cls(name)
                      for name in model_names]
        else:
            models = None

        try:
            from_ace = json.loads(self.options.from_ace)
        except ValueError as ex:
            raise ValueError('--from_ace: {}'.format(ex))

        try:
            to_ace = json.loads(self.options.to_ace)
        except ValueError as ex:
            raise ValueError('--to_ace: {}'.format(ex))

        six.print_('Updating documents ACE')

        update_ace(from_ace=from_ace, to_ace=to_ace, models=models)

        try:
            import transaction
            transaction.commit()
        except:
            pass

        six.print_('Done')
Example #9
0
 def run(self):
     ES.setup(self.settings)
     if self.options.recreate:
         self.recreate_index()
         models = engine.get_document_classes()
         model_names = [
             name for name, model in models.items()
             if getattr(model, '_index_enabled', False)]
     else:
         model_names = split_strip(self.options.models)
     self.index_models(model_names)
Example #10
0
 def run(self):
     ES.setup(self.settings)
     if self.options.recreate:
         self.recreate_index()
         models = engine.get_document_classes()
         model_names = [
             name for name, model in models.items()
             if getattr(model, '_index_enabled', False)
         ]
     else:
         model_names = split_strip(self.options.models)
     self.index_models(model_names)
Example #11
0
def setup_database(config):
    settings = dictset(config.registry.settings).mget('elasticsearch')
    params = {}
    params['chunk_size'] = settings.get('chunk_size', 500)
    params['hosts'] = []
    for hp in split_strip(settings['hosts']):
        h, p = split_strip(hp, ':')
        params['hosts'].append(dict(host=h, port=p))
    if settings.asbool('sniff'):
        params['sniff_on_start'] = True
        params['sniff_on_connection_fail'] = True

    # XXX if this connection has to deal with mongo and sqla objects,
    # then we'll need to use their es serializers instead. should
    # probably clean up that part of the engine interface - there's
    # lots of repeated code, plus other engines shouldn't have to know
    # about es - they should just know how to serialize their
    # documents to JSON.
    conn = connections.create_connection(
        serializer=JSONSerializer(), **params)
    setup_index(conn, settings)
Example #12
0
def process_fields_param(fields):
    """ Process 'fields' ES param.

    * Fields list is split if needed
    * '_type' field is added, if not present, so the actual value is
      displayed instead of 'None'
    """
    if not fields:
        return fields
    if isinstance(fields, six.string_types):
        fields = split_strip(fields)
    if "_type" not in fields:
        fields.append("_type")
    return {"_source_include": fields, "_source": True}
Example #13
0
def process_fields_param(fields):
    """ Process 'fields' ES param.

    * Fields list is split if needed
    * '_type' field is added, if not present, so the actual value is
      displayed instead of 'None'
    """
    if not fields:
        return fields
    if isinstance(fields, six.string_types):
        fields = split_strip(fields)
    if '_type' not in fields:
        fields.append('_type')
    return {
        '_source_include': fields,
        '_source': True,
    }
Example #14
0
    def run(self):
        if self.options.models:
            model_names = split_strip(self.options.models)
            models = [engine.get_document_cls(name) for name in model_names]
        else:
            models = None

        try:
            ace = json.loads(self.options.ace)
        except ValueError as ex:
            raise ValueError('--ace: {}'.format(ex))

        counts = count_ace(ace=ace, models=models)
        six.print_('Model,Count')
        for model, count in counts.items():
            if count is None:
                count = 'Not es-based'
            six.print_('{},{}'.format(model.__name__, count))
Example #15
0
    def get_collection(cls, _count=False, _strict=True, _sort=None,
                       _fields=None, _limit=None, _page=None, _start=None,
                       _query_set=None, _item_request=False, _explain=None,
                       _search_fields=None, q=None, **params):
        """ Query collection and return results.

        Notes:
        *   Before validating that only model fields are present in params,
            reserved params, query params and all params starting with
            double underscore are dropped.
        *   Params which have value "_all" are dropped.
        *   When ``_count`` param is used, objects count is returned
            before applying offset and limit.

        :param bool _strict: If True ``params`` are validated to contain
            only fields defined on model, exception is raised if invalid
            fields are present. When False - invalid fields are dropped.
            Defaults to ``True``.
        :param list _sort: Field names to sort results by. If field name
            is prefixed with "-" it is used for "descending" sorting.
            Otherwise "ascending" sorting is performed by that field.
            Defaults to an empty list in which case sorting is not
            performed.
        :param list _fields: Names of fields which should be included
            or excluded from results. Fields to excluded should be
            prefixed with "-". Defaults to an empty list in which
            case all fields are returned.
        :param int _limit: Number of results per page. Defaults
            to None in which case all results are returned.
        :param int _page: Number of page. In conjunction with
            ``_limit`` is used to calculate results offset. Defaults to
            None in which case it is ignored. Params ``_page`` and
            ``_start` are mutually exclusive.
        :param int _start: Results offset. If provided ``_limit`` and
            ``_page`` params are ignored when calculating offset. Defaults
            to None. Params ``_page`` and ``_start`` are mutually
            exclusive. If not offset-related params are provided, offset
            equals to 0.
        :param Query _query_set: Existing queryset. If provided, all queries
            are applied to it instead of creating new queryset. Defaults
            to None.
        :param bool _item_request: Indicates whether it is a single item
            request or not. When True and DataError happens on DB request,
            JHTTPNotFound is raised. JHTTPBadRequest is raised when False.
            Defaults to ``False``.
        :param _count: When provided, only results number is returned as
            integer.
        :param _explain: When provided, query performed(SQL) is returned
            as a string instead of query results.
        :param bool _raise_on_empty: When True JHTTPNotFound is raised
            if query returned no results. Defaults to False in which case
            error is just logged and empty query results are returned.
        :param q: Query string to perform full-text search with.
        :param _search_fields: Coma-separated list of field names to use
            with full-text search(q param) to limit fields which are
            searched.

        :returns: Query results as ``elasticsearch_dsl.XXX`` instance.
            May be sorted, offset, limited.
        :returns: Dict of {'field_name': fieldval}, when ``_fields`` param
            is provided.
        :returns: Number of query results as an int when ``_count`` param
            is provided.

        :raises JHTTPNotFound: When ``_raise_on_empty=True`` and no
            results found.
        :raises JHTTPNotFound: When ``_item_request=True`` and
            ``sqlalchemy.exc.DataError`` exception is raised during DB
            query. Latter exception is raised when querying DB with
            an identifier of a wrong type. E.g. when querying Int field
            with a string.
        :raises JHTTPBadRequest: When ``_item_request=False`` and
            ``sqlalchemy.exc.DataError`` exception is raised during DB
            query.
        :raises JHTTPBadRequest: When ``sqlalchemy.exc.InvalidRequestError``
            or ``sqlalchemy.exc.IntegrityError`` errors happen during DB
            query.
        """
        # see if the items are cached
        pk_field = cls.pk_field()
        if (list(params.keys()) == [pk_field] and _count==False
            and _strict==True and _sort==None and _fields==None
            and _limit==None and _page==None and _start==None
            and _query_set==None and _item_request==False and _explain==None
            and _search_fields==None and q==None):
            ids = params[pk_field]
            if not isinstance(ids, (list, tuple)):
                ids = [ids]
            results = []
            for id in ids:
                if not id in cls._cache:
                    break
                results.append(cls._cache[id])
            else:
                return results

        search_obj = cls.search()

        if _limit is not None:
            _start, limit = process_limit(_start, _page, _limit)
            search_obj = search_obj.extra(from_=_start, size=limit)

        if _fields:
            include, exclude = process_fields(_fields)
            if _strict:
                _validate_fields(cls, include + exclude)
            # XXX partial fields support isn't yet released. for now
            # we just use fields, later we'll add support for excluded fields
            search_obj = search_obj.fields(include)

        if params:
            params = _cleaned_query_params(cls, params, _strict)
            params = _restructure_params(cls, params)
            if params:
                search_obj = search_obj.filter('terms', **params)

        if q is not None:
            query_kw = {'query': q}
            if _search_fields is not None:
                query_kw['fields'] = _search_fields.split(',')
            search_obj = search_obj.query('query_string', **query_kw)

        if _count:
            return search_obj.count()

        if _explain:
            return search_obj.to_dict()

        if _sort:
            sort_fields = split_strip(_sort)
            if _strict:
                _validate_fields(
                    cls,
                    [f[1:] if f.startswith('-') else f for f in sort_fields]
                    )
            search_obj = search_obj.sort(*sort_fields)

        hits = search_obj.execute().hits
        hits._nefertari_meta = dict(
            total=hits.total,
            start=_start,
            fields=_fields
            )
        return hits