Beispiel #1
0
 def run(self):
     it = ( bson.BSON.encode(obj) for obj in self._query )
     chunks = size_chunked_iter(it, self._chunk_size)
     pagenum = None
     for pagenum, page_chunks in enumerate(
         util.item_chunked_iter(chunks, self._chunks_per_page)):
         self._job.status['query'] = 'Processed %d records' % (pagenum * self._chunks_per_page)
         fn = self._fn_tpl % pagenum
         log.debug('fn = %s', fn)
         index = []
         with open(fn, 'wb') as fp:
             pos = 0
             for chunk in page_chunks:
                 old = pos
                 fp.write(chunk)
                 pos = fp.tell()
                 index.append((old, pos))
         mm = self._job.map_read(fn)
         for b,e in index:
             log.debug('query put(%s,%s)', b,e)
             self.output.put((mm, b, e))
     if pagenum is None:
         log.info('Nothing to process from this query.  Job %s' % self._job.id)
     self.output.put(StopIteration)
     del self._job.status['query']
Beispiel #2
0
 def _iter(self):
     # Seems to be required due to some weirdness in gevent queues
     def anno_job():
         for o in self.input:
             yield dict(o, job_id=self._job.id)
     result = anno_job()
     result = util.item_chunked_iter(result, 100)
     return result
Beispiel #3
0
 def _handle_finalize(self, header, parts):
     ns = {}
     exec header['finalize_text'] in ns
     func = ns[header['finalize_name']]
     def obj_iter():
         for part in parts:
             for obj in util.bson_iter(part):
                 yield obj
     result = []
     util.send_bson(self._sink, header, zmq.SNDMORE)
     for result in util.item_chunked_iter(func(obj_iter()), 100):
         sresult = ''.join(map(bson.BSON.encode, result))
         self._sink.send(sresult, zmq.SNDMORE)
     self._sink.send('')