def _handle_basic(self, header, parts): ns = {} exec header['func_text'] in ns func = ns[header['func_name']] try: func(*header['args'], **header['kwargs']) except: for i, line in enumerate(header['func_text'].splitlines()): log.info('%.4d: %s', i+1, line) raise util.send_bson(self._sink, header)
def _handle_xreduce(self, header, parts): ns = {} exec header['reduce_text'] in ns func = ns[header['reduce_name']] result = [] for (c,k), objs in self._reduce_iter( lambda o:(o['c'],o['k']), parts): values = [ o['v'] for o in objs ] result.append(func(c,k,values)) util.send_bson(self._sink, header, zmq.SNDMORE) util.send_bson(self._sink, dict(result=result))
def _handle_reduce(self, header, parts): ns = {} exec header['reduce_text'] in ns func = ns[header['reduce_name']] result = [] for key, objs in self._reduce_iter( lambda o:o['_id'], parts): values = [ o['value'] for o in objs ] result.append(dict( _id=key, value=func(key,values))) util.send_bson(self._sink, header, zmq.SNDMORE) util.send_bson(self._sink, dict(result=result))
def _handle_finalize(self, header, parts): ns = {} exec header['finalize_text'] in ns func = ns[header['finalize_name']] def obj_iter(): for part in parts: for obj in util.bson_iter(part): yield obj result = [] util.send_bson(self._sink, header, zmq.SNDMORE) for result in util.item_chunked_iter(func(obj_iter()), 100): sresult = ''.join(map(bson.BSON.encode, result)) self._sink.send(sresult, zmq.SNDMORE) self._sink.send('')
def handle_message(self, header, parts): jobtype = header['jobtype'] log.debug('Got message %s', header) log.info('Listener %s starting "%s" for job_id %s (size %s)' % (header['listener_id'], jobtype, header['job_id'], len(parts))) try: handler = self._handlers[jobtype] handler(header, parts) except: log.exception('Error handling message') header['error'] = traceback.format_exc() util.send_bson(self._sink, header) else: log.info('Listener %s finished "%s" for job_id %s' % (header['listener_id'], jobtype, header['job_id']))
def _do_chunk(self, command, job, header, payload_gen=None): while self._outstanding_chunks >= self._outstanding_chunk_limit: gevent.sleep(0.5) self._outstanding_chunks += 1 try: log.debug('Doing chunk %s', job) j = self.jobs[job.id] if job.status['state'] in ('complete', 'error'): log.debug('Skipping chunk on %s job', job.status['state']) return None while True: with j.alloc_listener(job.id) as (l_queue, lid): try: header['job_id'] = job.id header['listener_id'] = lid begin = time.time() if payload_gen is None: util.send_bson(self._src_sock, header) size = 1 else: util.send_bson(self._src_sock, header, zmq.SNDMORE) size = 1 # never let it be 0 for payload in payload_gen(): self._src_sock.send(payload, zmq.SNDMORE) size += len(payload) self._src_sock.send('') tof = j.timeout_for(command, size) log.info('listener %s: total size of %d kB, time limit %ss', lid, size >> 10, tof) result = l_queue.get(timeout=tof) t_completed = float(time.time() - begin) j.completed(command, t_completed/size) log.info('listener %s: completed in %ss', lid, t_completed) log.info('Completed listener %s', lid) return result except Empty: log.info('Timed out listener %s', lid) j.timed_out(command, float(time.time() - begin)/size) finally: self._outstanding_chunks -= 1
def _handle_maplike(self, key, header, parts): ns = {} exec header['map_text'] in ns func = ns[header['map_name']] reduce_count = header['reduce_count'] result = [ [] for x in range(reduce_count) ] # Iterate, grouping chunks by the reduce chunk ID sz_input = 0 for part in parts: sz_input += len(part) for obj in func(util.bson_iter(part)): chunk_key = hash(key(obj)) % reduce_count result[chunk_key].append(obj) assert sz_input, 'There was no input!' # Emit reduce chunks one at a time util.send_bson(self._sink, header, zmq.SNDMORE) for result_chunk in result: self._sink.send( ''.join( bson.BSON.encode(dict(_id=key, value=value)) for key, value in result_chunk), zmq.SNDMORE) self._sink.send('')
def request_handler(self, i, context, uri): sock = context.socket(zmq.REP) sock.connect(uri) while True: try: obj = util.recv_bson(sock) log.debug('Request %s in %s', obj, i) command = obj.get('$command') log.debug('Req %s in %s', command, i) if command == 'echo': util.send_bson(sock, dict(message=obj)) elif command == 'terminate': self._terminate = True elif command in ('mapreduce', 'xmapreduce', 'basic'): job = Job.from_request(self, obj) self.job_manager.alloc_job(job) log.debug('Start job %s', job.id) if obj.get('wait'): job.run() self.job_manager.dealloc_job(job.id) else: gevent.spawn(util.ending_request(self.mongodb), job.run) util.send_bson(sock, dict(job_id=job.id, status=job.status)) continue elif command == 'status': try: jid = obj['job_id'] status = self.job_manager.job_status(jid) util.send_bson(sock, dict(job_id=jid, status=status)) if status['state'] in ('complete', 'error'): self.job_manager.dealloc_job(jid) except: log.exception('Error getting status for job') util.send_bson(sock, dict( job_id=obj['job_id'], status=dict( state='UNKNOWN'))) continue elif command == 'config': util.send_bson(sock, dict( self.options.zmr, src_port=self._src_port, sink_port=self._sink_port)) except: log.exception('Error in request handler') util.send_bson(sock, dict(error=traceback.format_exc())) continue finally: self.mongodb.end_request()