def start_mr(self, socket, full, collection, filter=None, map=None, reduce=None, finalize=None, map_name=None, reduce_name=None, finalize_name=None, inputs=None, output_type=None): if inputs is None: inputs = self._inputs req = { '$command': 'mapreduce', 'database':'zarkov', 'input_names':inputs, 'output_name':collection.name} if full: req['output_type'] = 'replace' else: req['output_type'] = 'reduce' if output_type is not None: req['output_type'] = output_type query = dict(self._query) if filter: query.update(filter) log.debug('query=%s' % query) req['query'] = bson.Binary(bson.BSON.encode(query)) if map is not None: if map_name is None: map_name = map.func_name req.update(map_text=func_text(map), map_name=map_name) if reduce is not None: if reduce_name is None: reduce_name = reduce.func_name req.update(reduce_text=func_text(reduce), reduce_name=reduce_name) if finalize is not None: if finalize_name is None: finalize_name = finalize.func_name req.update(finalize_text=func_text(finalize), finalize_name=finalize_name) r = req_bson(socket, req) self._job_ids.append(r['job_id']) return r
def start_basic_job(self, socket, func, *args, **kwargs): '''Start a (single-threaded) job to be run on a worker.''' req = { '$command': 'basic', 'database':'zarkov', 'args':list(args), 'kwargs':kwargs } req['func_name'] = func.func_name req['func_text'] = func_text(func) r = req_bson(socket, req) self._job_ids.append(r['job_id']) return r
def _worker_proc(cls, router_addr): setproctitle('zmr-worker') log.info('zmr-worker startup') context = zmq.Context() # Get config from router sock = context.socket(zmq.REQ) sock.connect(router_addr) config = util.req_bson(sock, {'$command':'config'}) host_uri = router_addr.rsplit(':', 1)[0] w = cls( context, host_uri, config['src_port'], config['sink_port'], suicide_level=config.get('suicide_level', 3 * 2**20)) w.serve_for(config['requests_per_worker_process'])
def join(self, socket): while self._job_ids: r = req_bson(socket, { '$command': 'status', 'job_id':self._job_ids[-1]}) log.info('Wait: %r', r) state = r['status']['state'] if state == 'error': log.error('Traceback:\n%s', r['status']['traceback']) raise zexc.WorkerError(r['status']['traceback']) elif state == 'UNKNOWN': log.error('Unknown status, terminating (maybe you restarted the zmr-router?)') raise zexc.WorkerError('Unknown') elif state == 'complete': self._results.append(r) self._job_ids.pop() else: time.sleep(1) return self._results