def run(self): argv_keepalive = [ ffi.new("char[]", "submit"), # Will be stripped off by the library ffi.new("char[]", "--hnp"), ffi.new("char[]", DVM_URI), ffi.NULL, # Required ] argv = ffi.new("char *[]", argv_keepalive) lib.orte_submit_init(3, argv, ffi.NULL) index = ffi.new("int *") for i in range(3): argv_keepalive = [ ffi.new("char[]", "RADICAL-Pilot"), ffi.new("char[]", "--np"), ffi.new("char[]", "1"), ffi.new("char[]", "false"), ffi.NULL, # Required ] argv = ffi.new("char *[]", argv_keepalive) lib.orte_submit_job(argv, index, lib.launch_cb, ffi.NULL, lib.finish_cb, ffi.NULL) task = index[0] task_instance_map[task] = self self.mywait += 1 self.myspawn += 1 print "Task %d submitted!" % task while self.myspawn > 0 or self.mywait > 0: time.sleep(0.1) print("Done!")
def init_orte(self, cu): # FIXME: it feels as a hack to get the DVM URI from the CU slots = cu['slots'] if 'lm_info' not in slots: raise RuntimeError('No lm_info to init via %s: %s' \ % (self.name, slots)) if not slots['lm_info']: raise RuntimeError('lm_info missing for %s: %s' \ % (self.name, slots)) if 'dvm_uri' not in slots['lm_info']: raise RuntimeError('dvm_uri not in lm_info for %s: %s' \ % (self.name, slots)) dvm_uri = slots['lm_info']['dvm_uri'] # Notify orte that we are using threads and that we require mutexes orte_lib.opal_set_using_threads(True) argv_keepalive = [ ffi.new("char[]", "RADICAL-Pilot"), # will be stripped off by lib ffi.new("char[]", "--hnp"), ffi.new("char[]", str(dvm_uri)), ffi.NULL, # required ] argv = ffi.new("char *[]", argv_keepalive) ret = orte_lib.orte_submit_init(3, argv, ffi.NULL) self._myhandle = ffi.new_handle(self) self._orte_initialized = True return ret
def init_orte(self, cu): # FIXME: it feels as a hack to get the DVM URI from the CU slots = cu['slots'] if 'lm_info' not in slots: raise RuntimeError('No lm_info to init via %s: %s' % (self.name, slots)) if not slots['lm_info']: raise RuntimeError('lm_info missing for %s: %s' % (self.name, slots)) if 'dvm_uri' not in slots['lm_info']: raise RuntimeError('dvm_uri not in lm_info for %s: %s' % (self.name, slots)) dvm_uri = slots['lm_info']['dvm_uri'] # Notify orte that we are using threads and that we require mutexes orte_lib.opal_set_using_threads(True) argv_keepalive = [ ffi.new("char[]", "RADICAL-Pilot"), # will be stripped off by lib ffi.new("char[]", "--hnp"), ffi.new("char[]", str(dvm_uri)), ffi.NULL, # required ] argv = ffi.new("char *[]", argv_keepalive) ret = orte_lib.orte_submit_init(3, argv, ffi.NULL) self._myhandle = ffi.new_handle(self) self._orte_initialized = True return ret
def run(self): argv_keepalive = [ ffi.new("char[]", "RADICAL-Pilot"), # Will be stripped off by the library ffi.new("char[]", "--hnp"), ffi.new("char[]", DVM_URI), ffi.NULL, # Required ] argv = ffi.new("char *[]", argv_keepalive) lib.orte_submit_init(3, argv, ffi.NULL) index = ffi.new("int[1]") for i in range(3): argv_keepalive = [ ffi.new("char[]", "RADICAL-Pilot"), ffi.new("char[]", "--np"), ffi.new("char[]", "1"), ffi.new("char[]", "bash"), ffi.new("char[]", "-c"), #ffi.new("char[]", "t=%d; echo $t; touch TOUCHME; sleep $t; exit 0" % 10), ffi.new("char[]", "sleep %d" % 10), ffi.NULL, # Required ] argv = ffi.new("char *[]", argv_keepalive) rc = lib.orte_submit_job(argv, index, lib.launch_cb, ffi.NULL, lib.finish_cb, ffi.NULL) task = index[0] task_instance_map[task] = self self.mywait += 1 self.myspawn += 1 print "Task %d submitted!" % task while self.myspawn > 0 or self.mywait > 0: global fourislaunched if fourislaunched: print "Cancelling task ..." lib.orte_submit_cancel(1) fourislaunched = False time.sleep(0.1) print("Done!") lib.orte_submit_halt()
def run(self, ): argv_keepalive = [ ffi.new("char[]", "RADICAL-Pilot"), # Will be stripped off by the library ffi.new("char[]", "--hnp"), ffi.new("char[]", DVM_URI), ffi.NULL, # Required ] argv = ffi.new("char *[]", argv_keepalive) lib.orte_submit_init(3, argv, ffi.NULL) # Used for storing the task id that is returned by orte_submit_job index_ptr = ffi.new("int[1]") task_no = 1 while task_no <= TASKS or self.active > 0: if task_no <= TASKS and self.active < CORES: task_id = 'unit.%.6d' % task_no cu_tmpdir = '%s' % task_id # # ASIC # self.session.prof.prof(event='get', state=AGENT_STAGING_INPUT_PENDING, uid=task_id, name='AgentStagingInputComponent') self.session.prof.prof(event='work start', state=AGENT_STAGING_INPUT_PENDING, uid=task_id, name='AgentStagingInputComponent') self.session.prof.prof('advance', uid=task_id, state=AGENT_STAGING_INPUT, name='AgentStagingInputComponent') os.mkdir('%s' % cu_tmpdir) self.session.prof.prof('advance', uid=task_id, state=ALLOCATING_PENDING, name='AgentStagingInputComponent') self.session.prof.prof(event='work done', state=AGENT_STAGING_INPUT_PENDING, uid=task_id, name='AgentStagingInputComponent') self.session.prof.prof(event='put', state=ALLOCATING_PENDING, uid=task_id, name='AgentStagingInputComponent') # # ASC # self.session.prof.prof(event='get', state=ALLOCATING_PENDING, uid=task_id, name='AgentSchedulingComponent') self.session.prof.prof(event='work start', state=ALLOCATING_PENDING, uid=task_id, name='AgentSchedulingComponent') self.session.prof.prof('advance', uid=task_id, state=ALLOCATING, name='AgentSchedulingComponent') self.session.prof.prof('schedule', msg='try', uid=task_id, name='AgentSchedulingComponent') self.session.prof.prof('schedule', msg='allocated', uid=task_id, name='AgentSchedulingComponent') self.session.prof.prof('advance', uid=task_id, state=EXECUTING_PENDING, name='AgentSchedulingComponent') self.session.prof.prof(event='put', state=EXECUTING_PENDING, uid=task_id, name='AgentSchedulingComponent') self.session.prof.prof(event='work done', state=ALLOCATING_PENDING, uid=task_id, name='AgentSchedulingComponent') # # AEC # self.session.prof.prof(event='get', state=EXECUTING_PENDING, uid=task_id, name='AgentExecutingComponent') self.session.prof.prof(event='work start', state=EXECUTING_PENDING, uid=task_id, name='AgentExecutingComponent') self.session.prof.prof('exec', msg='unit launch', uid=task_id, name='AgentExecutingComponent') self.session.prof.prof('spawn', msg='unit spawn', uid=task_id, name='AgentExecutingComponent') argv_keepalive = [ ffi.new("char[]", "RADICAL-Pilot"), ffi.new("char[]", "--np"), ffi.new("char[]", "1"), ] # Let the orted write stdout and stderr to rank-based output files argv_keepalive.append(ffi.new("char[]", "--output-filename")) argv_keepalive.append(ffi.new("char[]", "%s:nojobid,nocopy" % str(cu_tmpdir))) argv_keepalive.append(ffi.new("char[]", "sh")) argv_keepalive.append(ffi.new("char[]", "-c")) task_command = 'sleep %d' % SLEEP # Wrap in (sub)shell for output redirection task_command = "echo script start_script `%s` >> %s/PROF; " % (GTOD, cu_tmpdir) + \ task_command + \ "; echo script after_exec `%s` >> %s/PROF" % (GTOD, cu_tmpdir) argv_keepalive.append(ffi.new("char[]", str("%s; exit $RETVAL" % str(task_command)))) argv_keepalive.append(ffi.NULL) # NULL Termination Required argv = ffi.new("char *[]", argv_keepalive) self.session.prof.prof('command', msg='launch command constructed', uid=task_id, name='AgentExecutingComponent') struct = {'instance': self, 'task': task_id} cbdata = ffi.new_handle(struct) lib.orte_submit_job(argv, index_ptr, lib.launch_cb, cbdata, lib.finish_cb, cbdata) index = index_ptr[0] # pointer notation self.task_instance_map[index] = cbdata self.session.prof.prof('spawn', msg='spawning passed to orte', uid=task_id, name='AgentExecutingComponent') self.session.prof.prof(event='work done', state=EXECUTING_PENDING, uid=task_id, name='AgentExecutingComponent') print "Task %s submitted!" % task_id self.active += 1 task_no += 1 else: time.sleep(0.001) print("Execution done.") print() print("Collecting profiles ...") for task_no in range(TASKS): task_id = 'unit.%.6d' % task_no self.session.prof.prof('advance', uid=task_id, state=AGENT_STAGING_OUTPUT, name='AgentStagingOutputComponent') cu_tmpdir = '%s' % task_id if os.path.isfile("%s/PROF" % cu_tmpdir): try: with open("%s/PROF" % cu_tmpdir, 'r') as prof_f: txt = prof_f.read() for line in txt.split("\n"): if line: x1, x2, x3 = line.split() self.session.prof.prof(x1, msg=x2, timestamp=float(x3), uid=task_id, name='AgentStagingOutputComponent') except Exception as e: print("Pre/Post profiling file read failed: `%s`" % e)