예제 #1
0
def test_create_task_from_cu():
    """
    **Purpose**: Test if the 'create_task_from_cu' function generates a Task with the correct uid, parent_stage and
    parent_pipeline from a RP ComputeUnit
    """

    session = rp.Session(dburl=MLAB)
    umgr = rp.UnitManager(session=session)
    cud = rp.ComputeUnitDescription()
    cud.name = 'uid, name, parent_stage_uid, parent_stage_name, parent_pipeline_uid, parent_pipeline_name'
    cud.executable = '/bin/echo'

    cu = rp.ComputeUnit(umgr, cud)

    t = create_task_from_cu(cu)

    assert t.uid == 'uid'
    assert t.name == 'name'
    assert t.parent_stage['uid'] == 'parent_stage_uid'
    assert t.parent_stage['name'] == 'parent_stage_name'
    assert t.parent_pipeline['uid'] == 'parent_pipeline_uid'
    assert t.parent_pipeline['name'] == 'parent_pipeline_name'
예제 #2
0
    def enter(self, project=None):
        if project is not None:
            self.project = project

        project = self.project

        # just in case the user did not open a session yet, we do it now
        if project.session is None:
            project.open_rp()

        self.unit_manager = rp.UnitManager(session=project.session)

        # register this cluster with the session for later cleanup
        self.project.schedulers.add(self)

        self.pilot = self.project.pilot_manager.submit_pilots(self.desc)

        self.unit_manager.add_pilots(self.pilot)
        self.unit_manager.register_callback(self.unit_callback)

        self._folder_name = '%s-%s' % (
            project.session._uid, self.pilot._uid)

        self.stage_generators()
예제 #3
0
        pdesc2.project = ''
        pilot_list.append(pdesc2)

        # Continue adding pilot by creating a new descrption and appending it to
        # the list.

        # Submit the pilot list to the Pilot Manager. Actually all the pilots are
        # submitted to the Pilot Manager at once.
        print "Submitting Compute Pilots to Pilot Manager ..."
        pilots = pmgr.submit_pilots(pilot_list)

        # Combine the ComputePilot, the ComputeUnits and a scheduler via
        # a UnitManager object. The scheduler that supports multi-pilot sessions
        # is Round Robin. Direct Submittion does not.
        print "Initializing Unit Manager ..."
        umgr = rp.UnitManager(session=session,
                              scheduler=rp.SCHEDULER_ROUND_ROBIN)

        # Register our callback with the UnitManager. This callback will get
        # called every time any of the units managed by the UnitManager
        # change their state.
        umgr.register_callback(unit_state_cb)

        # Add the created ComputePilot to the UnitManager.
        print "Registering Compute Pilots with Unit Manager ..."
        umgr.add_pilots(pilots)

        NUMBER_JOBS = 64  # the total number of cus to run

        # submit CUs to pilot job
        cudesc_list = []
        for i in range(NUMBER_JOBS):
예제 #4
0
def test_bw_tagging():

    # we use a reporter class for nicer output
    report = ru.Reporter(name='radical.pilot')
    report.title('Getting Started (RP version %s)' % rp.version)

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    pmgr = rp.PilotManager(session=session)

    # Define an [n]-core local pilot that runs for [x] minutes
    # Here we use a dict to initialize the description object
    pd_init = {
        'resource': 'ncsa.bw_aprun',
        'runtime': 10,  # pilot runtime (min)
        'exit_on_error': True,
        'project': 'gk4',
        'queue': 'high',
        'access_schema': 'gsissh',
        'cores': 128
    }
    pdesc = rp.ComputePilotDescription(pd_init)

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    report.header('submit units')

    # Register the ComputePilot in a UnitManager object.
    umgr = rp.UnitManager(session=session)
    umgr.add_pilots(pilot)

    # Create a workload of ComputeUnits.
    # Each compute unit runs '/bin/date'.

    n = 5  # number of units to run
    report.info('create %d unit description(s)\n\t' % n)

    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's1_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        # cud.cpu_process_type = rp.MPI
        # cud.cpu_thread_type  = rp.OpenMP
        cud.output_staging = {
            'source': 'unit:///s1_t%s_hostname.txt' % i,
            'target': 'client:///s1_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)
        report.progress()
    report.ok('>>ok\n')

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all compute units to reach a final state
    # (DONE, CANCELED or FAILED).
    report.header('gather results')
    umgr.wait_units()

    n = 5  # number of units to run
    report.info('create %d unit description(s)\n\t' % n)

    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's2_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        cud.tag = cus[i].uid
        # cud.cpu_process_type = rp.MPI
        # cud.cpu_thread_type  = rp.OpenMP
        cud.output_staging = {
            'source': 'unit:///s2_t%s_hostname.txt' % i,
            'target': 'client:///s2_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)
        report.progress()
    report.ok('>>ok\n')

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all compute units to reach a final state (DONE, CANCELED or FAILED).
    report.header('gather results')
    umgr.wait_units()

    for i in range(0, n):
        assert open('s1_t%s_hostname.txt' % i,'r').readline().strip() == \
               open('s2_t%s_hostname.txt' % i,'r').readline().strip()

    report.header('finalize')
    session.close(download=True)

    report.header()

    for f in glob.glob('%s/*.txt' % os.getcwd()):
        os.remove(f)
예제 #5
0
        pdesc = rp.ComputePilotDescription()
        pdesc.resource = "xsede.stampede"
        pdesc.runtime = 40  # minutes
        pdesc.cores = 32
        pdesc.project = "TG-MCB090174"

        pilot_2 = pmgr.submit_pilots(pdesc)

        # reuse the pilot description for the third pilot
        pdesc.cores = 128

        pilot_3 = pmgr.submit_pilots(pdesc)

        # Combine the ComputePilot, the ComputeUnits and a scheduler via
        # a UnitManager object.
        umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_BACKFILLING)

        # Register our callback with the UnitManager. This callback will get
        # called every time any of the units managed by the UnitManager
        # change their state.
        umgr.register_callback(unit_state_cb)

        # Add the previsouly created ComputePilot to the UnitManager.
        umgr.add_pilots([pilot_1, pilot_2, pilot_3])

        # # wait until first pilots become active
        pilot_1.wait(state=rp.ACTIVE)

        # Create a workload of 8 ComputeUnits.
        cus = list()
예제 #6
0
def test_ordered_scheduler():

    report = ru.Reporter(name='radical.pilot')
    report.title('Getting Started (RP version %s)' % rp.version)

    session = rp.Session()

    try:
        # read the config used for resource details
        report.info('read config')
        report.ok('>>ok\n')

        report.header('submit pilots')

        pd_init = {
            'resource': 'local.localhost',
            'runtime': 5,
            'exit_on_error': True,
            'cores': 10
        }
        pdesc = rp.ComputePilotDescription(pd_init)
        pmgr = rp.PilotManager(session=session)
        pilot = pmgr.submit_pilots(pdesc)

        report.header('submit pipelines')

        umgr = rp.UnitManager(session=session)
        umgr.add_pilots(pilot)

        n_pipes = 2
        n_stages = 5
        n_tasks = 4

        cuds = list()
        for p in range(n_pipes):
            for s in range(n_stages):
                for t in range(n_tasks):
                    cud = rp.ComputeUnitDescription()
                    cud.executable = '%s/pipeline_task.sh' % pwd
                    cud.arguments = [p, s, t, 10]
                    cud.cpu_processes = 1
                    cud.tags = {
                        'order': {
                            'ns': p,
                            'order': s,
                            'size': n_tasks
                        }
                    }
                    cud.name = 'p%03d-s%03d-t%03d' % (p, s, t)
                    cuds.append(cud)
                    report.progress()

        import random
        random.shuffle(cuds)

        # Submit the previously created ComputeUnit descriptions to the
        # PilotManager. This will trigger the selected scheduler to start
        # assigning ComputeUnits to the ComputePilots.
        umgr.submit_units(cuds)

        # Wait for all compute units to reach a final state
        report.header('gather results')
        umgr.wait_units()

    except Exception as e:
        # Something unexpected happened in the pilot code above
        report.error('caught Exception: %s\n' % e)
        ru.print_exception_trace()
        raise

    except (KeyboardInterrupt, SystemExit) as e:
        # the callback called sys.exit(), and we can here catch the
        # corresponding KeyboardInterrupt exception for shutdown.  We also catch
        # SystemExit (which gets raised if the main threads exits for some other
        # reason).
        ru.print_exception_trace()
        report.warn('exit requested\n')

    finally:
        # always clean up the session, no matter if we caught an exception or
        # not.  This will kill all remaining pilots.
        report.header('finalize')
        session.close(download=False)

    report.header()
예제 #7
0
    def __enter__(self):
        # create the managers
        self.session = rp.Session()

        self.pmgr = rp.PilotManager(session=self.session)
        self.umgr = rp.UnitManager(session=self.session)
예제 #8
0
    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()
    print "session id: %s" % session.uid

    # all other pilot code is now tried/excepted.  If an exception is caught, we
    # can rely on the session object to exist and be valid, and we can thus tear
    # the whole RP stack down via a 'session.close()' call in the 'finally'
    # clause...
    try:

        pmgr = rp.PilotManager(session=session)
        pmgr.register_callback(pilot_state_cb)

        umgr = rp.UnitManager(session=session, scheduler=SCHED)
        umgr.register_callback(unit_state_cb,      rp.UNIT_STATE)
        umgr.register_callback(wait_queue_size_cb, rp.WAIT_QUEUE_SIZE)
        
        
        cuds = list()
        for unit_count in range(0, UNITS):
            cud = rp.ComputeUnitDescription()
            cud.executable     = "/bin/sh"
            cud.arguments      = ["-c", "echo $HOSTNAME:$OSG_HOSTNAME && sleep %d" % SLEEP]
            cud.cores          = 1
            cuds.append(cud)

        units = umgr.submit_units(cuds)
        
예제 #9
0
    def _process_tasks(self, task_queue, rmgr, mq_hostname, port):
        '''
        **Purpose**: The new thread that gets spawned by the main tmgr process
                     invokes this function. This function receives tasks from
                     'task_queue' and submits them to the RADICAL Pilot RTS.
        '''

        placeholders = dict()

        # ----------------------------------------------------------------------
        def load_placeholder(task, rts_uid):

            parent_pipeline = str(task.parent_pipeline['name'])
            parent_stage = str(task.parent_stage['name'])

            if parent_pipeline not in placeholders:
                placeholders[parent_pipeline] = dict()

            if parent_stage not in placeholders[parent_pipeline]:
                placeholders[parent_pipeline][parent_stage] = dict()

            if None not in [parent_pipeline, parent_stage, task.name]:
                placeholders[parent_pipeline][parent_stage][task.name] = \
                                                          {'path'   : task.path,
                                                           'rts_uid': rts_uid}

        # ----------------------------------------------------------------------
        def unit_state_cb(unit, state):

            try:

                self._log.debug('Unit %s in state %s' % (unit.uid, unit.state))

                if unit.state in rp.FINAL:

                    # Acquire a connection+channel to the rmq server
                    mq_connection = pika.BlockingConnection(
                        pika.ConnectionParameters(host=mq_hostname, port=port))
                    mq_channel = mq_connection.channel()

                    task = None
                    task = create_task_from_cu(unit, self._prof)

                    self._advance(task, 'Task', states.COMPLETED, mq_channel,
                                  '%s-cb-to-sync' % self._sid)

                    load_placeholder(task, unit.uid)

                    task_as_dict = json.dumps(task.to_dict())

                    mq_channel.basic_publish(exchange='',
                                             routing_key='%s-completedq-1' %
                                             self._sid,
                                             body=task_as_dict)

                    self._log.info(
                        'Pushed task %s with state %s to completed '
                        'queue %s-completedq-1', task.uid, task.state,
                        self._sid)

                    mq_connection.close()

            except KeyboardInterrupt:
                self._log.exception(
                    'Execution interrupted (probably by Ctrl+C)'
                    ' exit callback thread gracefully...')
                raise KeyboardInterrupt

            except Exception as e:
                self._log.exception('Error in RP callback thread: %s', e)

        # ----------------------------------------------------------------------

        umgr = rp.UnitManager(session=rmgr._session)
        umgr.add_pilots(rmgr.pilot)
        umgr.register_callback(unit_state_cb)

        try:

            while not self._tmgr_terminate.is_set():

                body = None

                try:
                    body = task_queue.get(block=True, timeout=10)

                except Queue.Empty:
                    # Ignore, we don't always have new tasks to run
                    pass

                if not body:
                    continue

                task_queue.task_done()

                bulk_tasks = list()
                bulk_cuds = list()

                for msg in body:

                    task = Task()
                    task.from_dict(msg)
                    bulk_tasks.append(task)
                    bulk_cuds.append(
                        create_cud_from_task(task, placeholders, self._prof))

                    mq_connection = pika.BlockingConnection(
                        pika.ConnectionParameters(host=mq_hostname, port=port))
                    mq_channel = mq_connection.channel()

                    self._advance(task, 'Task', states.SUBMITTING, mq_channel,
                                  '%s-tmgr-to-sync' % self._sid)
                    mq_connection.close()

                umgr.submit_units(bulk_cuds)

        except KeyboardInterrupt:
            self._log.exception('Execution interrupted (probably by Ctrl+C), '
                                'cancel task processor gracefully...')

        except Exception as e:
            self._log.exception('%s failed with %s', self._uid, e)
            raise EnTKError(e)
def test_local_integration():

    # if __name__ == '__main__':

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    pmgr = rp.PilotManager(session=session)

    # Update localhost lfs path and size
    cfg = session.get_resource_config('local.localhost')
    new_cfg = rp.ResourceConfig('local.localhost', cfg)
    new_cfg.lfs_path_per_node = '/tmp'
    new_cfg.lfs_size_per_node = 1024  # MB
    session.add_resource_config(new_cfg)
    cfg = session.get_resource_config('local.localhost')

    # Check that the updated config is read by the session
    assert 'lfs_path_per_node' in cfg.keys()
    assert 'lfs_size_per_node' in cfg.keys()
    assert cfg['lfs_path_per_node'] == '/tmp'
    assert cfg['lfs_size_per_node'] == 1024

    # Define an [n]-core local pilot that runs for [x] minutes
    # Here we use a dict to initialize the description object
    pd_init = {
        'resource': 'local.localhost',
        'runtime': 15,  # pilot runtime (min)
        'cores': 4
    }
    pdesc = rp.ComputePilotDescription(pd_init)

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    # Register the ComputePilot in a UnitManager object.
    umgr = rp.UnitManager(session=session)
    umgr.add_pilots(pilot)

    # Run 16 tasks that each require 1 core and 10MB of LFS
    n = 16
    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's1_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 1
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s1_t%s_hostname.txt' % i,
            'target': 'client:///s1_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all units to finish
    umgr.wait_units()

    n = 16
    cuds2 = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.tag = cus[i].uid
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's2_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 1
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s2_t%s_hostname.txt' % i,
            'target': 'client:///s2_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }

        cuds2.append(cud)

    # # Submit the previously created ComputeUnit descriptions to the
    # # PilotManager. This will trigger the selected scheduler to start
    # # assigning ComputeUnits to the ComputePilots.
    cus2 = umgr.submit_units(cuds2)

    # # Wait for all units to finish
    umgr.wait_units()

    for i in range(0, n):
        assert open('s1_t%s_hostname.txt' % i,
                    'r').readline().strip() == open('s2_t%s_hostname.txt' % i,
                                                    'r').readline().strip()

    session.close()

    txts = glob('%s/*.txt' % os.getcwd())
    for f in txts:
        os.remove(f)
예제 #11
0
def test_pass_issue_359():

    session = rp.Session()

    try:
        c = rp.Context('ssh')
        c.user_id = CONFIG["xsede.stampede"]["user_id"]
        session.add_context(c)

        pmgr = rp.PilotManager(session=session)
        pmgr.register_callback(pilot_state_cb)

        core_configs = [8, 16, 17, 32, 33]

        umgr_list = []
        for cores in core_configs:

            umgr = rp.UnitManager(session=session,
                                  scheduler=rp.SCHED_DIRECT_SUBMISSION)

            umgr.register_callback(unit_state_cb)

            pdesc = rp.ComputePilotDescription()
            pdesc.resource = "xsede.stampede"
            pdesc.project = CONFIG["xsede.stampede"]["project"]
            pdesc.runtime = 10
            pdesc.cores = cores

            pilot = pmgr.submit_pilots(pdesc)

            umgr.add_pilots(pilot)

            umgr_list.append(umgr)

        unit_list = []

        for umgr in umgr_list:

            test_task = rp.ComputeUnitDescription()

            test_task.pre_exec = CONFIG["xsede.stampede"]["pre_exec"]
            test_task.input_staging = ["../helloworld_mpi.py"]
            test_task.executable = "python"
            test_task.arguments = ["helloworld_mpi.py"]
            test_task.mpi = True
            test_task.cores = 8

            unit = umgr.submit_units(test_task)

            unit_list.append(unit)

        for umgr in umgr_list:
            umgr.wait_units()

        for unit in unit_list:
            print "* Task %s - state: %s, exit code: %s, started: %s, finished: %s, stdout: %s" \
                % (unit.uid, unit.state, unit.exit_code, unit.start_time, unit.stop_time, unit.stdout)

            assert (unit.state == rp.DONE)

    except Exception as e:
        print 'test failed'
        raise

    finally:
        pmgr.cancel_pilots()
        pmgr.wait_pilots()

        session.close()
예제 #12
0
def run_test(cfg):

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()
    print "session id: %s" % session.uid

    # all other pilot code is now tried/excepted.  If an exception is caught, we
    # can rely on the session object to exist and be valid, and we can thus tear
    # the whole RP stack down via a 'session.close()' call in the 'finally'
    # clause...
    try:

        # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
        print "Initializing Pilot Manager ..."
        pmgr = rp.PilotManager(session=session)

        # Register our callback with the PilotManager. This callback will get
        # called every time any of the pilots managed by the PilotManager
        # change their state.
        pmgr.register_callback(pilot_state_cb)

        pdesc = rp.ComputePilotDescription()
        pdesc.resource = cfg['cp_resource']
        if cfg['cp_schema']:
            pdesc.access_schema = cfg['cp_schema']
        pdesc.project = cfg['cp_project']
        pdesc.queue = cfg['cp_queue']
        pdesc.runtime = cfg['cp_runtime']
        pdesc.cores = cfg['cp_cores']
        pdesc.cleanup = True

        # submit the pilot.
        print "Submitting Compute Pilot to Pilot Manager ..."
        pilot = pmgr.submit_pilots(pdesc)

        # Combine the ComputePilot, the ComputeUnits and a scheduler via
        # a UnitManager object.
        print "Initializing Unit Manager ..."
        umgr = rp.UnitManager(session=session,
                              scheduler=rp.SCHEDULER_DIRECT_SUBMISSION)

        # Register our callback with the UnitManager. This callback will get
        # called every time any of the units managed by the UnitManager
        # change their state.
        umgr.register_callback(unit_state_cb)

        # Add the created ComputePilot to the UnitManager.
        print "Registering Compute Pilot with Unit Manager ..."
        umgr.add_pilots(pilot)

        NUMBER_JOBS = 10  # the total number of cus to run

        # submit CUs to pilot job
        cudesc_list = []
        for i in range(NUMBER_JOBS):

            cudesc = rp.ComputeUnitDescription()
            if cfg['cu_pre_exec']:
                cudesc.pre_exec = cfg['cu_pre_exec']
            cudesc.executable = cfg['executable']
            cudesc.arguments = ["helloworld_mpi.py"]
            cudesc.input_staging = [
                "%s/../examples/helloworld_mpi.py" % cfg['pwd']
            ]
            cudesc.cores = cfg['cu_cores']
            cudesc.mpi = True

            cudesc_list.append(cudesc)

        # Submit the previously created ComputeUnit descriptions to the
        # PilotManager. This will trigger the selected scheduler to start
        # assigning ComputeUnits to the ComputePilots.
        print "Submit Compute Units to Unit Manager ..."
        cu_set = umgr.submit_units(cudesc_list)

        print "Waiting for CUs to complete ..."
        umgr.wait_units()
        print "All CUs completed successfully!"

        for unit in cu_set:
            print "* Task %s - state: %s, exit code: %s, started: %s, finished: %s, stdout: %s" \
                  % (unit.uid, unit.state, unit.exit_code, unit.start_time, unit.stop_time, unit.stdout)

            assert (unit.state == rp.DONE)
            for i in range(cfg['cu_cores']):
                assert ('mpi rank %d/%d' % (i + 1, cfg['cu_cores'])
                        in unit.stdout)

    except Exception as e:
        # Something unexpected happened in the pilot code above
        print "caught Exception: %s" % e
        raise

    except (KeyboardInterrupt, SystemExit) as e:
        # the callback called sys.exit(), and we can here catch the
        # corresponding KeyboardInterrupt exception for shutdown.  We also catch
        # SystemExit (which gets raised if the main threads exits for some other
        # reason).
        print "need to exit now: %s" % e
        raise

    finally:
        # always clean up the session, no matter if we caught an exception or
        # not.
        print "closing session"
        print "SESSION ID: %s" % session.uid
        session.close(cleanup=False)
예제 #13
0
            'target': staged_file,
            'action': rp.TRANSFER
        }
        # Synchronously stage the data to the pilot
        pilot.stage_in(sd_pilot)

        # Configure the staging directive for shared input file.
        sd_shared = {
            'source': staged_file,
            'target': SHARED_INPUT_FILE,
            'action': rp.LINK
        }

        # Combine the ComputePilot, the ComputeUnits and a scheduler via
        # a UnitManager object.
        umgr = rp.UnitManager(session, rp.SCHEDULER_BACKFILLING)

        # Add the previously created ComputePilot to the UnitManager.
        umgr.add_pilots(pilot)

        compute_unit_descs = []

        for unit_idx in range(len(radical_cockpit_occupants)):

            # Configure the per unit input file.
            input_file = 'input_file-%d.txt' % (unit_idx + 1)

            # Configure the for per unit output file.
            output_file = 'output_file-%d.txt' % (unit_idx + 1)

            # Actual task description.
예제 #14
0
    pmgr.register_callback(pilot_state_cb)

    # Define a 2-core local pilot that runs for 10 minutes.
    pdesc = rp.ComputePilotDescription()
    pdesc.resource = "local.localhost"
    pdesc.runtime  = 10
    pdesc.cores    = 1

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    # Combine the ComputePilot, the ComputeUnits and a scheduler via
    # a UnitManager object.
    umgr = rp.UnitManager(
        session=session,
        scheduler=rp.SCHED_DIRECT_SUBMISSION,
        output_transfer_workers=4,
        input_transfer_workers=4)

    # Register our callback with the UnitManager. This callback will get
    # called every time any of the units managed by the UnitManager
    # change their state.
    umgr.register_callback(unit_state_change_cb)

    # Add the previsouly created ComputePilot to the UnitManager.
    umgr.add_pilots(pilot)

    # Create a workload of 8 ComputeUnits (tasks).
    compute_units = []
    for unit_count in range(0, 8):
        cu = rp.ComputeUnitDescription()
예제 #15
0
            'action': rp.TRANSFER
        }
        # Synchronously stage the data to the pilot
        pilot.stage_in(sd_pilot)

        # Configure the staging directive for shared input file.
        sd_shared = {
            'source': staged_file,
            'target': 'input.txt',
            'action': rp.LINK
        }

        # Combine the ComputePilot, the ComputeUnits and a scheduler via
        # a UnitManager object.
        print "Initializing Unit Manager ..."
        umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT)

        umgr.register_callback(unit_state_cb)
        # Add the created ComputePilot to the UnitManager.
        print "Registering Compute Pilot with Unit Manager ..."
        umgr.add_pilots(pilot)

        NUMBER_OF_TRAJECTORIES = total_file_lines
        # i should define a "good" window based on division rules. in respect to the NUMBER_OF_TRAJECTORIES

        # submit CUs to pilot job
        cudesc_list = []
        sd_inter_in_list = list()
        #print 'window is ' + str(WINDOW_SIZE)
        #print 'traj count is ' + str(NUMBER_OF_TRAJECTORIES)
        for i in range(1, NUMBER_OF_TRAJECTORIES + 1, WINDOW_SIZE):
예제 #16
0
def run_test(cfg):

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()
    print "session id: %s" % session.uid

    # all other pilot code is now tried/excepted.  If an exception is caught, we
    # can rely on the session object to exist and be valid, and we can thus tear
    # the whole RP stack down via a 'session.close()' call in the 'finally'
    # clause...
    try:

        pmgr = rp.PilotManager(session=session)
        pmgr.register_callback(pilot_state_cb)

        pdesc = rp.ComputePilotDescription()
        pdesc.resource = cfg['cp_resource']
        pdesc.cores = cfg['cp_cores']
        pdesc.project = cfg['cp_project']
        pdesc.queue = cfg['cp_queue']
        pdesc.runtime = cfg['cp_runtime']
        pdesc.cleanup = False
        pdesc.access_schema = cfg['cp_schema']

        pilot = pmgr.submit_pilots(pdesc)

        input_sd_pilot = {
            'source': 'file:///etc/passwd',
            'target': 'staging:///f1',
            'action': rp.TRANSFER
        }
        pilot.stage_in(input_sd_pilot)

        umgr = rp.UnitManager(session=session, scheduler=SCHED)
        umgr.register_callback(unit_state_cb, rp.UNIT_STATE)
        umgr.register_callback(wait_queue_size_cb, rp.WAIT_QUEUE_SIZE)
        umgr.add_pilots(pilot)

        input_sd_umgr = {
            'source': 'file:///etc/group',
            'target': 'f2',
            'action': rp.COPY
        }
        input_sd_agent = {
            'source': 'staging:///f1',
            'target': 'f1',
            'action': rp.COPY
        }
        output_sd_agent = {
            'source': 'f1',
            'target': 'staging:///f1.bak',
            'action': rp.COPY
        }
        output_sd_umgr = {
            'source': 'f2',
            'target': 'f2.bak',
            'action': rp.TRANSFER
        }

        cuds = list()
        for unit_count in range(0, UNITS):
            cud = rp.ComputeUnitDescription()
            cud.executable = "wc"
            cud.arguments = ["f1", "f2"]
            cud.cores = 1
            cud.input_staging = [input_sd_umgr, input_sd_agent]
            cud.output_staging = [output_sd_umgr, output_sd_agent]
            cuds.append(cud)

        units = umgr.submit_units(cuds)

        umgr.wait_units()

        for cu in units:
            print "* Task %s state %s, exit code: %s, started: %s, finished: %s" \
                % (cu.uid, cu.state, cu.exit_code, cu.start_time, cu.stop_time)

    # os.system ("radicalpilot-stats -m stat,plot -s %s > %s.stat" % (session.uid, session_name))

    except Exception as e:
        # Something unexpected happened in the pilot code above
        print "caught Exception: %s" % e
        raise

    except (KeyboardInterrupt, SystemExit) as e:
        # the callback called sys.exit(), and we can here catch the
        # corresponding KeyboardInterrupt exception for shutdown.  We also catch
        # SystemExit (which gets raised if the main threads exits for some other
        # reason).
        print "need to exit now: %s" % e
        raise

    finally:
        # always clean up the session, no matter if we caught an exception or
        # not.
        print "closing session"
        print "SESSION ID: %s" % session.uid
        session.close(cleanup=False)
예제 #17
0
            'runtime': 15,  # pilot runtime (min)
            'exit_on_error': True,
            'project': config[resource]['project'],
            'queue': config[resource]['queue'],
            'access_schema': config[resource]['schema'],
            'cores': config[resource]['cores']
        }
        pdesc = rp.ComputePilotDescription(pd_init)

        # Launch the pilot.
        pilot = pmgr.submit_pilots(pdesc)

        report.header('submit units')

        # Register the ComputePilot in a UnitManager object.
        umgr = rp.UnitManager(session=session)
        umgr.add_pilots(pilot)

        # Create a workload of ComputeUnits.
        # Each compute unit runs '/bin/date'.
        n = 128  # number of units to run
        report.info('create %d unit description(s)\n\t' % n)

        cuds = list()
        for i in range(0, n):

            # create a new CU description, and fill it.
            # Here we don't use dict initialization.
            cud = rp.ComputeUnitDescription()
            cud.executable = '/bin/date'
            cuds.append(cud)
 
     # Create a Compute Unit that sorts the local password file and writes the
     # output to result.dat.
     #
     #  The exact command that is executed by the agent is:
     #    "/usr/bin/sort -o result.dat passwd"
     #
     cud = rp.ComputeUnitDescription()
     cud.executable     = "/usr/bin/sort"
     cud.arguments      = ["-o", "result.dat", "passwd"]
     cud.input_staging  = "/etc/passwd"
     cud.output_staging = "result.dat"
 
     # Combine the ComputePilot, the ComputeUnits and a scheduler via
     # a UnitManager object.
     umgr = rp.UnitManager(session, rp.SCHED_DIRECT_SUBMISSION)
 
     # Register our callback with the UnitManager. This callback will get
     # called every time any of the units managed by the UnitManager
     # change their state.
     umgr.register_callback(unit_state_cb)
 
     # Add the previously created ComputePilot to the UnitManager.
     umgr.add_pilots(pilot)
 
     # Submit the previously created ComputeUnit description to the
     # PilotManager. This will trigger the selected scheduler to start
     # assigning the ComputeUnit to the ComputePilot.
     unit = umgr.submit_units(cud)
 
     # Wait for the compute unit to reach a terminal state (DONE or FAILED).
예제 #19
0
         cu.arguments     = ["helloworld_mpi.py"]
         cu.input_staging = ["helloworld_mpi.py"]
 
         # These two parameters are relevant to MPI execution:
         #   'cores' sets the number of cores required by the task
         #   'mpi' identifies the task as an MPI taskg
         cu.cores         = 8
         cu.mpi           = True
 
 
         cud_list.append(cu)
 
     # Combine the ComputePilot, the ComputeUnits and a scheduler via
     # a UnitManager object.
     umgr = rp.UnitManager(
         session=session,
         scheduler=rp.SCHED_DIRECT_SUBMISSION)
 
     # Register our callback with the UnitManager. This callback will get
     # called every time any of the units managed by the UnitManager
     # change their state.
     umgr.register_callback(unit_state_cb)
 
     # Add the previously created ComputePilot to the UnitManager.
     umgr.add_pilots(pilot)
 
     # Submit the previously created ComputeUnit descriptions to the
     # PilotManager. This will trigger the selected scheduler to start
     # assigning ComputeUnits to the ComputePilots.
     units = umgr.submit_units(cud_list)
 
예제 #20
0
def test_bw_integration():

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    pmgr = rp.PilotManager(session=session)

    # Define an [n]-core local pilot that runs for [x] minutes
    # Here we use a dict to initialize the description object
    pd_init = {
        'resource': 'ncsa.bw_aprun',
        'runtime': 10,  # pilot runtime (min)
        'cores': 128,
        'project': 'gk4',
        'queue': 'high'
    }
    pdesc = rp.ComputePilotDescription(pd_init)

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    # Register the ComputePilot in a UnitManager object.
    umgr = rp.UnitManager(session=session)
    umgr.add_pilots(pilot)

    # Run 16 tasks that each require 1 core and 10MB of LFS
    n = 4
    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's1_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        # cud.cpu_process_type = None
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s1_t%s_hostname.txt' % i,
            'target': 'client:///s1_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all units to finish
    umgr.wait_units()

    n = 4
    cuds2 = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.tag = cus[i].uid
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's2_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        cud.cpu_process_type = None
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s2_t%s_hostname.txt' % i,
            'target': 'client:///s2_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds2.append(cud)

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus2 = umgr.submit_units(cuds2)

    # Wait for all units to finish
    umgr.wait_units()

    # Check that all units succeeded
    for i in range(0, n):
        assert open('s1_t%s_hostname.txt' % i,
                    'r').readline().strip() == open('s2_t%s_hostname.txt' % i,
                                                    'r').readline().strip()

    session.close()

    txts = glob('%s/*.txt' % os.getcwd())
    for f in txts:
        os.remove(f)