def index(self, obj, attributes=None):
     if not IDexterityContent.providedBy(obj):
         return
     # get transaction id
     ts = time.time()
     annotations = IAnnotations(obj)
     annotations["ELASTIC_LAST_INDEXING_QUEUED_TIMESTAMP"] = ts
     index.delay("/".join(obj.getPhysicalPath()), ts, index_name())
 def numObjects(self):
     """Return the number of indexed objects."""
     es_kwargs = dict(index=index_name(), body={"query": {"match_all": {}}})
     es = get_query_client()
     try:
         return es.count(**es_kwargs)["count"]
     except Exception:
         logger.exception('ElasticSearch "count" query failed')
         return "Problem getting all documents count from ElasticSearch!"
Ejemplo n.º 3
0
 def __call__(self):
     cat = api.portal.get_tool("portal_catalog")
     count = 0
     for path in cat._catalog.uids:
         if path.endswith("/portal_catalog"):
             # no idea why it is in the list, ignore
             continue
         index.delay(path, 0, index_name())
         count += 1
     return "queued {0}".format(count)
 def unindex(self, obj):
     uid = api.content.get_uuid(obj)
     unindex.delay(uid, index_name())
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        keys = []
        for key in record.keys:
            key = key.replace("\\", "").replace('"', "")
            if not isinstance(key, bytes):
                key = key.encode("utf8")
            keys.append(key)
        template_params = {"keys": keys}
        __traceback_info__ = "template parameters: {0}".format(template_params)
        query_body = self._apply_template(template_params)
        logger.info(query_body)
        es_kwargs = dict(
            index=index_name(),
            body=query_body,
            size=BATCH_SIZE,
            scroll="1m",
            _source_includes=["rid"],
        )
        es = get_query_client()
        try:
            result = es.search(**es_kwargs)
        except RequestError:
            logger.info("Query failed:\n{0}".format(query_body))
            return None
        except TransportError:
            logger.exception("ElasticSearch failed")
            return None
        # initial return value, other batches to be applied

        def score(record):
            return int(10000 * float(record["_score"]))

        retval = IIBTree()
        for r in result["hits"]["hits"]:
            retval[r["_source"]["rid"]] = score(r)

        total = result["hits"]["total"]["value"]
        if total > BATCH_SIZE:
            sid = result["_scroll_id"]
            counter = BATCH_SIZE
            while counter < total:
                result = es.scroll(scroll_id=sid, scroll="1m")
                for record in result["hits"]["hits"]:
                    retval[record["_source"]["rid"]] = score(record)
                counter += BATCH_SIZE
        return retval, (self.id,)