def getSBids(self, SB = ''): """Return a list of al msg_id of jobs that are running on a given SB """ if SB == '': return [] SBids = [] query = self.get_client().db_query({'completed': None},['buffers','msg_id']) for q in query: # unpack the buffer of the job to obtain the SB number null, com, args = unpack_apply_message(q['buffers']) if args['SB'] == SB: SBids.append(q['msg_id']) return SBids
def getSBids(self, SB=''): """Return a list of al msg_id of jobs that are running on a given SB """ if SB == '': return [] SBids = [] query = self.get_client().db_query({'completed': None}, ['buffers', 'msg_id']) for q in query: # unpack the buffer of the job to obtain the SB number null, com, args = unpack_apply_message(q['buffers']) if args['SB'] == SB: SBids.append(q['msg_id']) return SBids
def run(self): if self.session.get_client() == None: self.mylog.error("Not connected to a cluster.") return False # workaround for Ipython bug which makes everything slow, # create a new client, use it and delete it c = Client(profile='ssh') jcmd = self.session.opts.get_opt('jcmd') if jcmd == 'purge': num = 0 query = c.db_query({'completed':{'$ne' : None }},['msg_id']) for q in query: result = c.get_result(q['msg_id']).get() # filter on SB, node, task if self._check_result(result): num += 1 c.purge_results(q['msg_id']) mylogger.userinfo(self.mylog, str(num)+" cluster's hub results deleted.") elif jcmd == 'list': num = 0 # query the hub DB for all the finished tasks and get IDs query = c.db_query({'completed':{'$ne' : None }},['msg_id','completed','started']) # search for interesting results and print them for q in query: result = c.get_result(q['msg_id']).get() # filter on SB, node, task if self._check_result(result): # skip results without error if wanted if self.session.opts.get_opt('onlyerr') and result['err'] == '': continue num += 1 header = {'Task' : result['task'], 'Node' : result['node'],\ 'SB' : result['SB'], \ 'Completed' : q['completed'].replace(microsecond=0), \ 'Started' : q['started'].replace(microsecond=0), \ 'Exec time': q['completed'].replace(microsecond=0)-q['started'].replace(microsecond=0)} data = {'Std Output': result['out'], 'Std Error': result['err'], \ 'Command':result['command']} print_jobs(header, data, self.session.opts.get_opt('lines')) mylogger.userinfo(self.mylog, str(num)+" processes listed.") elif jcmd == 'running': num_r = 0 num_q = 0 # TODO: it should be "Started" not "submitted", unfortunately ipython does not set it query = c.db_query({'completed': None},['buffers','engine_uuid','submitted']) for q in query: # unpack the buffer of the sent jobs to obtain the arguments null, com, args = unpack_apply_message(q['buffers']) # filter on SB, node, task if self._check_result({'node':args['node'],'SB':args['SB'],'task':args['task']}): if q['engine_uuid'] == None: if self.session.opts.get_opt('queue') == False: continue q['msg_id'] = q['msg_id']+" (queue)" num_q += 1 else: num_r += 1 header = {'Msg_id' : q['msg_id'], 'Task' : args['task'], 'Node' : args['node'], 'SB' : args['SB'], \ 'Started' : q['submitted'].replace(microsecond=0), \ 'Extime': datetime.datetime.now().replace(microsecond=0) - q['submitted'].replace(microsecond=0)} data = {'Command': com[0]} print_jobs(header, data, self.session.opts.get_opt('lines')) mylogger.userinfo(self.mylog, "Processes running: "+str(num_r)+". In queue: "+str(num_q)+".") elif jcmd == 'kill': print "TBI" #TODO: add a resubmit option to resubmit all tasks that failed http://ipython.org/ipython-doc/stable/parallel/parallel_task.html del c
def apply_request(self, stream, ident, parent): # flush previous reply, so this request won't block it stream.flush(zmq.POLLOUT) try: content = parent['content'] bufs = parent['buffers'] msg_id = parent['header']['msg_id'] # bound = parent['header'].get('bound', False) except: self.log.error("Got bad msg: %s"%parent, exc_info=True) return # pyin_msg = self.session.msg(u'pyin',{u'code':code}, parent=parent) # self.iopub_stream.send(pyin_msg) # self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent) sub = {'dependencies_met' : True, 'engine' : self.ident, 'started': datetime.now()} try: # allow for not overriding displayhook if hasattr(sys.displayhook, 'set_parent'): sys.displayhook.set_parent(parent) sys.stdout.set_parent(parent) sys.stderr.set_parent(parent) # exec "f(*args,**kwargs)" in self.user_ns, self.user_ns working = self.user_ns # suffix = prefix = "_"+str(msg_id).replace("-","")+"_" f,args,kwargs = unpack_apply_message(bufs, working, copy=False) # if bound: # bound_ns = Namespace(working) # args = [bound_ns]+list(args) fname = getattr(f, '__name__', 'f') fname = prefix+"f" argname = prefix+"args" kwargname = prefix+"kwargs" resultname = prefix+"result" ns = { fname : f, argname : args, kwargname : kwargs , resultname : None } # print ns working.update(ns) code = "%s=%s(*%s,**%s)"%(resultname, fname, argname, kwargname) try: exec(code, working,working) result = working.get(resultname) finally: for key in ns.keys(): working.pop(key) # if bound: # working.update(bound_ns) packed_result,buf = serialize_object(result) result_buf = [packed_result]+buf except: exc_content = self._wrap_exception('apply') # exc_msg = self.session.msg(u'pyerr', exc_content, parent) self.session.send(self.iopub_stream, 'pyerr', exc_content, parent=parent, ident=asbytes('%s.pyerr'%self.prefix)) reply_content = exc_content result_buf = [] if exc_content['ename'] == 'UnmetDependency': sub['dependencies_met'] = False else: reply_content = {'status' : 'ok'} # put 'ok'/'error' status in header, for scheduler introspection: sub['status'] = reply_content['status'] reply_msg = self.session.send(stream, 'apply_reply', reply_content, parent=parent, ident=ident,buffers=result_buf, subheader=sub) # flush i/o # should this be before reply_msg is sent, like in the single-kernel code, # or should nothing get in the way of real results? sys.stdout.flush() sys.stderr.flush()
def apply_request(self, stream, ident, parent): # flush previous reply, so this request won't block it stream.flush(zmq.POLLOUT) try: content = parent[u'content'] bufs = parent[u'buffers'] msg_id = parent['header']['msg_id'] # bound = parent['header'].get('bound', False) except: self.log.error("Got bad msg: %s" % parent, exc_info=True) return # pyin_msg = self.session.msg(u'pyin',{u'code':code}, parent=parent) # self.iopub_stream.send(pyin_msg) # self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent) sub = { 'dependencies_met': True, 'engine': self.ident, 'started': datetime.now() } try: # allow for not overriding displayhook if hasattr(sys.displayhook, 'set_parent'): sys.displayhook.set_parent(parent) sys.stdout.set_parent(parent) sys.stderr.set_parent(parent) # exec "f(*args,**kwargs)" in self.user_ns, self.user_ns working = self.user_ns # suffix = prefix = "_" + str(msg_id).replace("-", "") + "_" f, args, kwargs = unpack_apply_message(bufs, working, copy=False) # if bound: # bound_ns = Namespace(working) # args = [bound_ns]+list(args) fname = getattr(f, '__name__', 'f') fname = prefix + "f" argname = prefix + "args" kwargname = prefix + "kwargs" resultname = prefix + "result" ns = {fname: f, argname: args, kwargname: kwargs, resultname: None} # print ns working.update(ns) code = "%s=%s(*%s,**%s)" % (resultname, fname, argname, kwargname) try: exec code in working, working result = working.get(resultname) finally: for key in ns.iterkeys(): working.pop(key) # if bound: # working.update(bound_ns) packed_result, buf = serialize_object(result) result_buf = [packed_result] + buf except: exc_content = self._wrap_exception('apply') # exc_msg = self.session.msg(u'pyerr', exc_content, parent) self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent, ident=asbytes('%s.pyerr' % self.prefix)) reply_content = exc_content result_buf = [] if exc_content['ename'] == 'UnmetDependency': sub['dependencies_met'] = False else: reply_content = {'status': 'ok'} # put 'ok'/'error' status in header, for scheduler introspection: sub['status'] = reply_content['status'] reply_msg = self.session.send(stream, u'apply_reply', reply_content, parent=parent, ident=ident, buffers=result_buf, subheader=sub) # flush i/o # should this be before reply_msg is sent, like in the single-kernel code, # or should nothing get in the way of real results? sys.stdout.flush() sys.stderr.flush()
def run(self): if self.session.get_client() == None: self.mylog.error("Not connected to a cluster.") return False # workaround for Ipython bug which makes everything slow, # create a new client, use it and delete it c = Client(profile='ssh') jcmd = self.session.opts.get_opt('jcmd') if jcmd == 'purge': num = 0 query = c.db_query({'completed': {'$ne': None}}, ['msg_id']) for q in query: result = c.get_result(q['msg_id']).get() # filter on SB, node, task if self._check_result(result): num += 1 c.purge_results(q['msg_id']) mylogger.userinfo(self.mylog, str(num) + " cluster's hub results deleted.") elif jcmd == 'list': num = 0 # query the hub DB for all the finished tasks and get IDs query = c.db_query({'completed': { '$ne': None }}, ['msg_id', 'completed', 'started']) # search for interesting results and print them for q in query: result = c.get_result(q['msg_id']).get() # filter on SB, node, task if self._check_result(result): # skip results without error if wanted if self.session.opts.get_opt( 'onlyerr') and result['err'] == '': continue num += 1 header = {'Task' : result['task'], 'Node' : result['node'],\ 'SB' : result['SB'], \ 'Completed' : q['completed'].replace(microsecond=0), \ 'Started' : q['started'].replace(microsecond=0), \ 'Exec time': q['completed'].replace(microsecond=0)-q['started'].replace(microsecond=0)} data = {'Std Output': result['out'], 'Std Error': result['err'], \ 'Command':result['command']} print_jobs(header, data, self.session.opts.get_opt('lines')) mylogger.userinfo(self.mylog, str(num) + " processes listed.") elif jcmd == 'running': num_r = 0 num_q = 0 # TODO: it should be "Started" not "submitted", unfortunately ipython does not set it query = c.db_query({'completed': None}, ['buffers', 'engine_uuid', 'submitted']) for q in query: # unpack the buffer of the sent jobs to obtain the arguments null, com, args = unpack_apply_message(q['buffers']) # filter on SB, node, task if self._check_result({ 'node': args['node'], 'SB': args['SB'], 'task': args['task'] }): if q['engine_uuid'] == None: if self.session.opts.get_opt('queue') == False: continue q['msg_id'] = q['msg_id'] + " (queue)" num_q += 1 else: num_r += 1 header = {'Msg_id' : q['msg_id'], 'Task' : args['task'], 'Node' : args['node'], 'SB' : args['SB'], \ 'Started' : q['submitted'].replace(microsecond=0), \ 'Extime': datetime.datetime.now().replace(microsecond=0) - q['submitted'].replace(microsecond=0)} data = {'Command': com[0]} print_jobs(header, data, self.session.opts.get_opt('lines')) mylogger.userinfo( self.mylog, "Processes running: " + str(num_r) + ". In queue: " + str(num_q) + ".") elif jcmd == 'kill': print "TBI" #TODO: add a resubmit option to resubmit all tasks that failed http://ipython.org/ipython-doc/stable/parallel/parallel_task.html del c