Python ScanResult Examples

Programming Language: Python

Namespace/Package Name: corehq.elastic

Class/Type: ScanResult

Examples at hotexamples.com: 6

Python ScanResult - 6 examples found. These are the top rated real world Python examples of corehq.elastic.ScanResult extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ScanResult(6)

Frequently Used Methods

ScanResult (6)

Example #1

Show file

def _get_export_documents_from_file(dump_path, doc_count):
    """Mimic the results of an ES scroll query but get results from jsonlines file"""
    def _doc_iter():
        with gzip.open(dump_path) as file:
            for line in file:
                yield json.loads(line.decode())
        os.remove(dump_path)

    return ScanResult(doc_count, _doc_iter())

Example #2

Show file

 def scroll(self):
     """
     Run the query against the scroll api. Returns an iterator yielding each
     document that matches the query.
     """
     result = scroll_query(self.index, self.raw_query)
     return ScanResult(result.count,
                       (ESQuerySet.normalize_result(deepcopy(self), r)
                        for r in result))

Example #3

Show file

 def scroll(self):
     """
     Run the query against the scroll api. Returns an iterator yielding each
     document that matches the query.
     """
     query = deepcopy(self)
     if query._size is None:
         query._size = SCROLL_PAGE_SIZE_LIMIT
     result = scroll_query(query.index, query.raw_query)
     return ScanResult(result.count, (ESQuerySet.normalize_result(query, r)
                                      for r in result))

Example #4

Show file

File: export.py Project: mekete/commcare-hq

def get_export_documents(export_instance, filters):
    # Pull doc ids from elasticsearch and stream to disk
    query = _get_export_query(export_instance, filters)
    _, temp_path = tempfile.mkstemp()
    with open(temp_path, 'w') as f:
        scroll_result = query.scroll_ids()
        for doc_id in scroll_result:
            f.write(doc_id + '\n')

    def iter_export_docs():
        # Stream doc ids from disk and fetch documents from ES in chunks
        with open(temp_path) as f:
            doc_ids = (doc_id.strip() for doc_id in f)
            for doc in iter_es_docs(query.index, doc_ids):
                yield doc
        os.remove(temp_path)

    return ScanResult(scroll_result.count, iter_export_docs())

Example #5

Show file

File: export.py Project: esmaeilinia/commcare-hq

def get_export_documents(export_instance, filters):
    # Pull doc ids from elasticsearch and stream to disk
    query = _get_export_query(export_instance, filters)
    scroll_result = query.scroll_ids()

    def iter_export_docs():
        with TransientTempfile() as temp_path:
            with open(temp_path, 'w', encoding='utf-8') as f:
                for doc_id in scroll_result:
                    f.write(doc_id + '\n')

            # Stream doc ids from disk and fetch documents from ES in chunks
            with open(temp_path, 'r', encoding='utf-8') as f:
                doc_ids = (doc_id.strip() for doc_id in f)
                for doc in iter_es_docs(query.index, doc_ids):
                    yield doc

    return ScanResult(scroll_result.count, iter_export_docs())

Example #6

Show file

    def scroll(self):
        result_docs = list(self._result_docs)
        total = len(result_docs)
        if self._sort_field:
            result_docs.sort(key=lambda doc: doc[self._sort_field],
                             reverse=self._sort_desc)
        if self._size is not None:
            result_docs = result_docs[self._start:self._start + self._size]
        else:
            result_docs = result_docs[self._start:]

        def _get_doc(doc):
            if self._source_fields:
                return {key: doc[key] for key in self._source_fields if key in doc}
            return doc

        es_query_set = (ESQuerySet.normalize_result(self,
                                                    {'_source': _get_doc(r)}) for r in result_docs)
        return ScanResult(total, es_query_set)