Example #1
0
    def run(self):

        argv_keepalive = [
            ffi.new("char[]", "submit"), # Will be stripped off by the library
            ffi.new("char[]", "--hnp"), ffi.new("char[]", DVM_URI),
            ffi.NULL, # Required
        ]
        argv = ffi.new("char *[]", argv_keepalive)
        lib.orte_submit_init(3, argv, ffi.NULL)

        index = ffi.new("int *")

        for i in range(3):

            argv_keepalive = [
                ffi.new("char[]", "RADICAL-Pilot"),
                ffi.new("char[]", "--np"), ffi.new("char[]", "1"),
                ffi.new("char[]", "false"),
                ffi.NULL, # Required
            ]
            argv = ffi.new("char *[]", argv_keepalive)
            lib.orte_submit_job(argv, index, lib.launch_cb, ffi.NULL, lib.finish_cb, ffi.NULL)
            task = index[0]
            task_instance_map[task] = self
            self.mywait += 1
            self.myspawn += 1
            print "Task %d submitted!" % task

        while self.myspawn > 0 or self.mywait > 0:
            time.sleep(0.1)

        print("Done!")
Example #2
0
    def run(self):

        argv_keepalive = [
            ffi.new("char[]", "submit"),  # Will be stripped off by the library
            ffi.new("char[]", "--hnp"),
            ffi.new("char[]", DVM_URI),
            ffi.NULL,  # Required
        ]
        argv = ffi.new("char *[]", argv_keepalive)
        lib.orte_submit_init(3, argv, ffi.NULL)

        index = ffi.new("int *")

        for i in range(3):

            argv_keepalive = [
                ffi.new("char[]", "RADICAL-Pilot"),
                ffi.new("char[]", "--np"),
                ffi.new("char[]", "1"),
                ffi.new("char[]", "false"),
                ffi.NULL,  # Required
            ]
            argv = ffi.new("char *[]", argv_keepalive)
            lib.orte_submit_job(argv, index, lib.launch_cb, ffi.NULL,
                                lib.finish_cb, ffi.NULL)
            task = index[0]
            task_instance_map[task] = self
            self.mywait += 1
            self.myspawn += 1
            print "Task %d submitted!" % task

        while self.myspawn > 0 or self.mywait > 0:
            time.sleep(0.1)

        print("Done!")
Example #3
0
    def init_orte(self, cu):

        # FIXME: it feels as a hack to get the DVM URI from the CU

        slots = cu['slots']

        if 'lm_info' not in slots:
            raise RuntimeError('No lm_info to init via %s: %s' \
                               % (self.name, slots))

        if not slots['lm_info']:
            raise RuntimeError('lm_info missing for %s: %s' \
                               % (self.name, slots))

        if 'dvm_uri' not in slots['lm_info']:
            raise RuntimeError('dvm_uri not in lm_info for %s: %s' \
                               % (self.name, slots))

        dvm_uri = slots['lm_info']['dvm_uri']

        # Notify orte that we are using threads and that we require mutexes
        orte_lib.opal_set_using_threads(True)

        argv_keepalive = [
            ffi.new("char[]", "RADICAL-Pilot"), # will be stripped off by lib
            ffi.new("char[]", "--hnp"), ffi.new("char[]", str(dvm_uri)),
            ffi.NULL, # required
        ]
        argv = ffi.new("char *[]", argv_keepalive)
        ret = orte_lib.orte_submit_init(3, argv, ffi.NULL)

        self._myhandle = ffi.new_handle(self)
        self._orte_initialized = True

        return ret
Example #4
0
    def init_orte(self, cu):

        # FIXME: it feels as a hack to get the DVM URI from the CU

        slots = cu['slots']

        if 'lm_info' not in slots:
            raise RuntimeError('No lm_info to init via %s: %s'
                               % (self.name, slots))

        if not slots['lm_info']:
            raise RuntimeError('lm_info missing for %s: %s'
                               % (self.name, slots))

        if 'dvm_uri' not in slots['lm_info']:
            raise RuntimeError('dvm_uri not in lm_info for %s: %s'
                               % (self.name, slots))

        dvm_uri = slots['lm_info']['dvm_uri']

        # Notify orte that we are using threads and that we require mutexes
        orte_lib.opal_set_using_threads(True)

        argv_keepalive = [
            ffi.new("char[]", "RADICAL-Pilot"),  # will be stripped off by lib
            ffi.new("char[]", "--hnp"), ffi.new("char[]", str(dvm_uri)),
            ffi.NULL,  # required
        ]
        argv = ffi.new("char *[]", argv_keepalive)
        ret = orte_lib.orte_submit_init(3, argv, ffi.NULL)

        self._myhandle = ffi.new_handle(self)
        self._orte_initialized = True

        return ret
Example #5
0
    def run(self):

        argv_keepalive = [
            ffi.new("char[]", "RADICAL-Pilot"), # Will be stripped off by the library
            ffi.new("char[]", "--hnp"), ffi.new("char[]", DVM_URI),
            ffi.NULL, # Required
        ]
        argv = ffi.new("char *[]", argv_keepalive)
        lib.orte_submit_init(3, argv, ffi.NULL)

        index = ffi.new("int[1]")

        for i in range(3):

            argv_keepalive = [
                ffi.new("char[]", "RADICAL-Pilot"),
                ffi.new("char[]", "--np"), ffi.new("char[]", "1"),
                ffi.new("char[]", "bash"), ffi.new("char[]", "-c"),
                #ffi.new("char[]", "t=%d; echo $t; touch TOUCHME; sleep $t; exit 0" % 10),
                ffi.new("char[]", "sleep %d" % 10),
                ffi.NULL, # Required
            ]
            argv = ffi.new("char *[]", argv_keepalive)
            rc = lib.orte_submit_job(argv, index, lib.launch_cb, ffi.NULL, lib.finish_cb, ffi.NULL)
            task = index[0]
            task_instance_map[task] = self
            self.mywait += 1
            self.myspawn += 1
            print "Task %d submitted!" % task


        while self.myspawn > 0 or self.mywait > 0:

            global fourislaunched
            if fourislaunched:
                print "Cancelling task ..."
                lib.orte_submit_cancel(1)
                fourislaunched = False

            time.sleep(0.1)

        print("Done!")

        lib.orte_submit_halt()
Example #6
0
    def run(self, ):

        argv_keepalive = [
            ffi.new("char[]", "RADICAL-Pilot"), # Will be stripped off by the library
            ffi.new("char[]", "--hnp"),
            ffi.new("char[]", DVM_URI),
            ffi.NULL, # Required
        ]
        argv = ffi.new("char *[]", argv_keepalive)
        lib.orte_submit_init(3, argv, ffi.NULL)

        # Used for storing the task id that is returned by orte_submit_job
        index_ptr = ffi.new("int[1]")

        task_no = 1
        while task_no <= TASKS or self.active > 0:

            if task_no <= TASKS and self.active < CORES:

                task_id = 'unit.%.6d' % task_no
                cu_tmpdir = '%s' % task_id

                #
                # ASIC
                #
                self.session.prof.prof(event='get', state=AGENT_STAGING_INPUT_PENDING, uid=task_id, name='AgentStagingInputComponent')
                self.session.prof.prof(event='work start', state=AGENT_STAGING_INPUT_PENDING, uid=task_id, name='AgentStagingInputComponent')
                self.session.prof.prof('advance', uid=task_id, state=AGENT_STAGING_INPUT, name='AgentStagingInputComponent')
                os.mkdir('%s' % cu_tmpdir)
                self.session.prof.prof('advance', uid=task_id, state=ALLOCATING_PENDING, name='AgentStagingInputComponent')
                self.session.prof.prof(event='work done', state=AGENT_STAGING_INPUT_PENDING, uid=task_id, name='AgentStagingInputComponent')
                self.session.prof.prof(event='put', state=ALLOCATING_PENDING, uid=task_id, name='AgentStagingInputComponent')

                #
                # ASC
                #
                self.session.prof.prof(event='get', state=ALLOCATING_PENDING, uid=task_id, name='AgentSchedulingComponent')
                self.session.prof.prof(event='work start', state=ALLOCATING_PENDING, uid=task_id, name='AgentSchedulingComponent')
                self.session.prof.prof('advance', uid=task_id, state=ALLOCATING, name='AgentSchedulingComponent')
                self.session.prof.prof('schedule', msg='try', uid=task_id, name='AgentSchedulingComponent')
                self.session.prof.prof('schedule', msg='allocated', uid=task_id, name='AgentSchedulingComponent')
                self.session.prof.prof('advance', uid=task_id, state=EXECUTING_PENDING, name='AgentSchedulingComponent')
                self.session.prof.prof(event='put', state=EXECUTING_PENDING, uid=task_id, name='AgentSchedulingComponent')
                self.session.prof.prof(event='work done', state=ALLOCATING_PENDING, uid=task_id, name='AgentSchedulingComponent')

                #
                # AEC
                #

                self.session.prof.prof(event='get', state=EXECUTING_PENDING, uid=task_id, name='AgentExecutingComponent')
                self.session.prof.prof(event='work start', state=EXECUTING_PENDING, uid=task_id, name='AgentExecutingComponent')
                self.session.prof.prof('exec', msg='unit launch', uid=task_id, name='AgentExecutingComponent')
                self.session.prof.prof('spawn', msg='unit spawn', uid=task_id, name='AgentExecutingComponent')

                argv_keepalive = [
                    ffi.new("char[]", "RADICAL-Pilot"),
                    ffi.new("char[]", "--np"), ffi.new("char[]", "1"),
                ]

                # Let the orted write stdout and stderr to rank-based output files
                argv_keepalive.append(ffi.new("char[]", "--output-filename"))
                argv_keepalive.append(ffi.new("char[]", "%s:nojobid,nocopy" % str(cu_tmpdir)))

                argv_keepalive.append(ffi.new("char[]", "sh"))
                argv_keepalive.append(ffi.new("char[]", "-c"))

                task_command = 'sleep %d' % SLEEP

                # Wrap in (sub)shell for output redirection
                task_command = "echo script start_script `%s` >> %s/PROF; " % (GTOD, cu_tmpdir) + \
                      task_command + \
                      "; echo script after_exec `%s` >> %s/PROF" % (GTOD, cu_tmpdir)
                argv_keepalive.append(ffi.new("char[]", str("%s; exit $RETVAL" % str(task_command))))

                argv_keepalive.append(ffi.NULL) # NULL Termination Required
                argv = ffi.new("char *[]", argv_keepalive)

                self.session.prof.prof('command', msg='launch command constructed', uid=task_id, name='AgentExecutingComponent')

                struct = {'instance': self, 'task': task_id}
                cbdata = ffi.new_handle(struct)

                lib.orte_submit_job(argv, index_ptr, lib.launch_cb, cbdata, lib.finish_cb, cbdata)

                index = index_ptr[0] # pointer notation
                self.task_instance_map[index] = cbdata

                self.session.prof.prof('spawn', msg='spawning passed to orte', uid=task_id, name='AgentExecutingComponent')
                self.session.prof.prof(event='work done', state=EXECUTING_PENDING, uid=task_id, name='AgentExecutingComponent')

                print "Task %s submitted!" % task_id

                self.active += 1
                task_no += 1

            else:
                time.sleep(0.001)

        print("Execution done.")
        print()
        print("Collecting profiles ...")
        for task_no in range(TASKS):
            task_id = 'unit.%.6d' % task_no
            self.session.prof.prof('advance', uid=task_id, state=AGENT_STAGING_OUTPUT, name='AgentStagingOutputComponent')
            cu_tmpdir = '%s' % task_id
            if os.path.isfile("%s/PROF" % cu_tmpdir):
                try:
                    with open("%s/PROF" % cu_tmpdir, 'r') as prof_f:
                        txt = prof_f.read()
                        for line in txt.split("\n"):
                            if line:
                                x1, x2, x3 = line.split()
                                self.session.prof.prof(x1, msg=x2, timestamp=float(x3), uid=task_id, name='AgentStagingOutputComponent')
                except Exception as e:
                    print("Pre/Post profiling file read failed: `%s`" % e)