Exemple #1
0
def generate_trajectorygenerationtask_cud(task_desc, db, shared_path, project):
    # Compute Unit Description
    cud = rp.ComputeUnitDescription()
    cud.name = task_desc['_id']

    # Get each component of the task
    pre_task_details = task_desc['_dict'].get('pre', list())
    main_task_details = task_desc['_dict']['_main']
    post_task_details = task_desc['_dict'].get('post', list())
    #resource_requirements = task_desc['_dict']['resource_requirements']

    # First, extract environment variables
    cud.environment = get_environment_from_task(task_desc)

    # Next, extract things we need to add to the PATH
    # TODO: finish adding path directive
    paths = get_paths_from_task(task_desc)

    # Next, get input staging
    # We get "ALL" COPY/LINK directives from the pre_exec
    staging_directives = get_input_staging(pre_task_details,
                                           db,
                                           shared_path,
                                           project,
                                           break_after_non_dict=False)
    # We get "ALL" COPY/LINK directives from the main *before* the first non-dictionary entry
    staging_directives.extend(
        get_input_staging(main_task_details, db, shared_path, project))
    cud.input_staging = staging_directives

    # Next, get pre execution steps
    pre_exec = list()
    pre_exec = [
        'mkdir -p traj',
        'mkdir -p extension',
    ]
    pre_exec.extend(get_commands(pre_task_details, shared_path, project))
    cud.pre_exec = pre_exec

    # Now, do main executable
    exe, args = get_executable_arguments(main_task_details, shared_path,
                                         project)
    cud.executable = str(exe)
    cud.arguments = args

    # Now, get output staging steps
    # We get "ALL" COPY/LINK directives from the post_exec
    staging_directives = get_output_staging(task_desc,
                                            post_task_details,
                                            db,
                                            shared_path,
                                            project,
                                            continue_before_non_dict=False)
    # We get "ALL" COPY/LINK directives from the main *after* the first non-dictionary entry
    staging_directives.extend(
        get_output_staging(task_desc, main_task_details, db, shared_path,
                           project))
    cud.output_staging = staging_directives

    # Get all post-execution steps
    post_exec = list()
    post_exec.extend(get_commands(post_task_details, shared_path, project))
    cud.post_exec = post_exec

    describe_compute_setup(cud, task_desc)

    return cud
Exemple #2
0
    pdesc.cores = 1

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)
    pilot.register_callback(pilot_state_cb)

    # Combine the ComputePilot, the ComputeUnits and a scheduler via
    # a UnitManager object.
    umgr = rp.UnitManager(session=session,
                          scheduler=rp.SCHEDULER_DIRECT_SUBMISSION)

    # Add the previsouly created ComputePilot to the UnitManager.
    umgr.add_pilots(pilot)

    # Create a workload of 8 ComputeUnits (tasks).
    unit_descr = rp.ComputeUnitDescription()
    unit_descr.executable = "/bin/sleep"
    unit_descr.arguments = ['10']
    unit_descr.cores = 1

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    units = umgr.submit_units(unit_descr)

    # Wait for all compute units to finish.
    for unit in umgr.get_units():
        unit.register_callback(unit_state_change_cb)

    umgr.wait_units()
    def test_generate_pythontask_cud(self):
        """Test proper Compute Unit Description generation for PythonTask"""
        task_descriptions = self.db.get_task_descriptions()

        # PythonTask
        task_desc = dict()
        for task in task_descriptions:
            if task['_id'] == "04f01b52-8c69-11e7-9eb2-0000000000fe":
                task_desc = task
                break

        # Get the input.json example
        with open('{}/{}'.format(directory, ptask_in_example)) as json_data:
            inpu_json_data = json.load(json_data)

        cud = utils.generate_pythontask_cud(task_desc, self.db,
                                            '/home/example', self.db.project)
        actual_cud = rp.ComputeUnitDescription()
        actual_cud.name = "04f01b52-8c69-11e7-9eb2-0000000000fe"
        actual_cud.environment = {"TEST3": "3", "TEST4": "4"}
        actual_cud.input_staging = [{
            "action": "Link",
            "source": "pilot:///_run_.py",
            "target": "unit:///_run_.py"
        }, {
            "action": "Link",
            "source": "pilot:///alanine.pdb",
            "target": "unit:///input.pdb"
        }]
        actual_cud.pre_exec = [
            'mkdir -p traj',
            'mkdir -p extension',
            'echo \'{}\' > \'{}\''.format(
                json.dumps(inpu_json_data['contents']),
                inpu_json_data['target']),  # stage input.json
            "source /home/test/venv/bin/activate"
        ]
        actual_cud.executable = 'python'
        actual_cud.arguments = ['_run_.py']
        actual_cud.output_staging = [{
            "action":
            "Copy",
            "source":
            "output.json",
            "target":
            "/home/example/projects/{}//models/model.0x4f01b528c6911e79eb20000000000feL.json"
            .format(self.db.project)
        }]
        actual_cud.post_exec = ["deactivate"]

        actual_cud.cpu_process_type = 'POSIX'
        actual_cud.gpu_process_type = 'POSIX'
        actual_cud.cpu_thread_type = 'POSIX'
        actual_cud.gpu_thread_type = 'CUDA'
        actual_cud.cpu_processes = 10
        actual_cud.gpu_processes = 1
        actual_cud.cpu_threads = 1
        actual_cud.gpu_threads = 1

        # compare all parts of the cuds
        self.maxDiff = None
        self.assertEquals(cud.name, actual_cud.name)
        self.assertDictEqual(cud.environment, actual_cud.environment)
        self.assertListEqual(cud.input_staging, actual_cud.input_staging)
        self.assertListEqual(cud.pre_exec, actual_cud.pre_exec)
        self.assertEquals(cud.executable, actual_cud.executable)
        self.assertListEqual(cud.arguments, actual_cud.arguments)
        self.assertListEqual(cud.output_staging, actual_cud.output_staging)
        self.assertListEqual(cud.post_exec, actual_cud.post_exec)

        self.assertEquals(cud.cpu_process_type, actual_cud.cpu_process_type)
        self.assertEquals(cud.cpu_thread_type, actual_cud.cpu_thread_type)
        self.assertEquals(cud.cpu_processes, actual_cud.cpu_processes)
        self.assertEquals(cud.cpu_threads, actual_cud.cpu_threads)
def test_local_integration():

    # if __name__ == '__main__':

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    pmgr = rp.PilotManager(session=session)

    # Update localhost lfs path and size
    cfg = session.get_resource_config('local.localhost')
    new_cfg = rp.ResourceConfig('local.localhost', cfg)
    new_cfg.lfs_path_per_node = '/tmp'
    new_cfg.lfs_size_per_node = 1024  # MB
    session.add_resource_config(new_cfg)
    cfg = session.get_resource_config('local.localhost')

    # Check that the updated config is read by the session
    assert 'lfs_path_per_node' in cfg.keys()
    assert 'lfs_size_per_node' in cfg.keys()
    assert cfg['lfs_path_per_node'] == '/tmp'
    assert cfg['lfs_size_per_node'] == 1024

    # Define an [n]-core local pilot that runs for [x] minutes
    # Here we use a dict to initialize the description object
    pd_init = {
        'resource': 'local.localhost',
        'runtime': 15,  # pilot runtime (min)
        'cores': 4
    }
    pdesc = rp.ComputePilotDescription(pd_init)

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    # Register the ComputePilot in a UnitManager object.
    umgr = rp.UnitManager(session=session)
    umgr.add_pilots(pilot)

    # Run 16 tasks that each require 1 core and 10MB of LFS
    n = 16
    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's1_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 1
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s1_t%s_hostname.txt' % i,
            'target': 'client:///s1_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all units to finish
    umgr.wait_units()

    n = 16
    cuds2 = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.tag = cus[i].uid
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's2_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 1
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s2_t%s_hostname.txt' % i,
            'target': 'client:///s2_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }

        cuds2.append(cud)

    # # Submit the previously created ComputeUnit descriptions to the
    # # PilotManager. This will trigger the selected scheduler to start
    # # assigning ComputeUnits to the ComputePilots.
    cus2 = umgr.submit_units(cuds2)

    # # Wait for all units to finish
    umgr.wait_units()

    for i in range(0, n):
        assert open('s1_t%s_hostname.txt' % i,
                    'r').readline().strip() == open('s2_t%s_hostname.txt' % i,
                                                    'r').readline().strip()

    session.close()

    txts = glob('%s/*.txt' % os.getcwd())
    for f in txts:
        os.remove(f)
def test_local_tagging():

    # we use a reporter class for nicer output
    report = ru.Reporter(name='radical.pilot')
    report.title('Getting Started (RP version %s)' % rp.version)

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    pmgr = rp.PilotManager(session=session)

    # Define an [n]-core local pilot that runs for [x] minutes
    # Here we use a dict to initialize the description object
    pd_init = {'resource': 'local.localhost',
               'runtime': 10,  # pilot runtime (min)
               'exit_on_error': True,
               'cores': 4
              }
    pdesc = rp.ComputePilotDescription(pd_init)

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    report.header('submit units')

    # Register the ComputePilot in a UnitManager object.
    umgr = rp.UnitManager(session=session)
    umgr.add_pilots(pilot)

    # Create a workload of ComputeUnits.

    n = 5  # number of units to run
    report.info('create %d unit description(s)\n\t' % n)

    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud                  = rp.ComputeUnitDescription()
        cud.executable       = '/bin/hostname'
        cud.arguments        = ['>', 's1_t%s_hostname.txt' % i]
        cud.cpu_processes    = 1
        cud.cpu_threads      = 1
      # cud.cpu_process_type = rp.MPI
      # cud.cpu_thread_type  = rp.OpenMP
        cud.output_staging   = {'source': 'unit:///s1_t%s_hostname.txt' % i,
                                'target': 'client:///s1_t%s_hostname.txt' % i,
                                'action': rp.TRANSFER}
        cuds.append(cud)
        report.progress()
    report.ok('>>ok\n')

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all compute units to reach a final state
    # (DONE, CANCELED or FAILED).
    report.header('gather results')
    umgr.wait_units()

    n = 5  # number of units to run
    report.info('create %d unit description(s)\n\t' % n)

    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud                  = rp.ComputeUnitDescription()
        cud.executable       = '/bin/hostname'
        cud.arguments        = ['>', 's2_t%s_hostname.txt' % i]
        cud.cpu_processes    = 1
        cud.cpu_threads      = 1
        cud.tag              = cus[i].uid
      # cud.cpu_process_type = rp.MPI
      # cud.cpu_thread_type  = rp.OpenMP
        cud.output_staging   = {'source': 'unit:///s2_t%s_hostname.txt' % i,
                                'target': 'client:///s2_t%s_hostname.txt' % i,
                                'action': rp.TRANSFER}
        cuds.append(cud)
        report.progress()
    report.ok('>>ok\n')

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all compute units to reach a final state (DONE, CANCELED or FAILED).
    report.header('gather results')
    umgr.wait_units()

    for i in range(0, n):
        assert open('s1_t%s_hostname.txt' % i,'r').readline().strip() == \
               open('s2_t%s_hostname.txt' % i,'r').readline().strip()

    report.header('finalize')
    session.close(download=True)

    report.header()

    for f in glob.glob('%s/*.txt' % os.getcwd()):
        os.remove(f)
Exemple #6
0
def test_bw_integration():

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    pmgr = rp.PilotManager(session=session)

    # Define an [n]-core local pilot that runs for [x] minutes
    # Here we use a dict to initialize the description object
    pd_init = {
        'resource': 'ncsa.bw_aprun',
        'runtime': 10,  # pilot runtime (min)
        'cores': 128,
        'project': 'gk4',
        'queue': 'high'
    }
    pdesc = rp.ComputePilotDescription(pd_init)

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    # Register the ComputePilot in a UnitManager object.
    umgr = rp.UnitManager(session=session)
    umgr.add_pilots(pilot)

    # Run 16 tasks that each require 1 core and 10MB of LFS
    n = 4
    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's1_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        # cud.cpu_process_type = None
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s1_t%s_hostname.txt' % i,
            'target': 'client:///s1_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all units to finish
    umgr.wait_units()

    n = 4
    cuds2 = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.tag = cus[i].uid
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's2_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        cud.cpu_process_type = None
        # cud.cpu_process_type = rp.MPI
        cud.lfs_per_process = 10  # MB
        cud.output_staging = {
            'source': 'unit:///s2_t%s_hostname.txt' % i,
            'target': 'client:///s2_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds2.append(cud)

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus2 = umgr.submit_units(cuds2)

    # Wait for all units to finish
    umgr.wait_units()

    # Check that all units succeeded
    for i in range(0, n):
        assert open('s1_t%s_hostname.txt' % i,
                    'r').readline().strip() == open('s2_t%s_hostname.txt' % i,
                                                    'r').readline().strip()

    session.close()

    txts = glob('%s/*.txt' % os.getcwd())
    for f in txts:
        os.remove(f)
Exemple #7
0
def test_pass_issue_359():

    session = rp.Session()

    try:
        c = rp.Context('ssh')
        c.user_id = CONFIG["xsede.stampede"]["user_id"]
        session.add_context(c)

        pmgr = rp.PilotManager(session=session)
        pmgr.register_callback(pilot_state_cb)

        core_configs = [8, 16, 17, 32, 33]

        umgr_list = []
        for cores in core_configs:

            umgr = rp.UnitManager(session=session,
                                  scheduler=rp.SCHED_DIRECT_SUBMISSION)

            umgr.register_callback(unit_state_cb)

            pdesc = rp.ComputePilotDescription()
            pdesc.resource = "xsede.stampede"
            pdesc.project = CONFIG["xsede.stampede"]["project"]
            pdesc.runtime = 10
            pdesc.cores = cores

            pilot = pmgr.submit_pilots(pdesc)

            umgr.add_pilots(pilot)

            umgr_list.append(umgr)

        unit_list = []

        for umgr in umgr_list:

            test_task = rp.ComputeUnitDescription()

            test_task.pre_exec = CONFIG["xsede.stampede"]["pre_exec"]
            test_task.input_staging = ["../helloworld_mpi.py"]
            test_task.executable = "python"
            test_task.arguments = ["helloworld_mpi.py"]
            test_task.mpi = True
            test_task.cores = 8

            unit = umgr.submit_units(test_task)

            unit_list.append(unit)

        for umgr in umgr_list:
            umgr.wait_units()

        for unit in unit_list:
            print "* Task %s - state: %s, exit code: %s, started: %s, finished: %s, stdout: %s" \
                % (unit.uid, unit.state, unit.exit_code, unit.start_time, unit.stop_time, unit.stdout)

            assert (unit.state == rp.DONE)

    except Exception as e:
        print 'test failed'
        raise

    finally:
        pmgr.cancel_pilots()
        pmgr.wait_pilots()

        session.close()
Exemple #8
0
import os
import radical.pilot as rp

# ------------------------------------------------------------------------------
#
if __name__ == '__main__':

    here = os.path.abspath(os.path.dirname(__file__))
    master = '%s/task_overlay_master.py' % here
    worker = '%s/task_overlay_worker.py' % here

    session = rp.Session()
    try:
        pd = {'resource': 'local.debug', 'cores': 128, 'runtime': 60}

        td = {'executable': master, 'arguments': [worker]}

        pmgr = rp.PilotManager(session=session)
        umgr = rp.UnitManager(session=session)
        pilot = pmgr.submit_pilots(rp.ComputePilotDescription(pd))
        task = umgr.submit_units(rp.ComputeUnitDescription(td))

        umgr.add_pilots(pilot)
        umgr.wait_units()

    finally:
        session.close(download=True)

# ------------------------------------------------------------------------------
Exemple #9
0
def run_test(cfg):

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()
    print "session id: %s" % session.uid

    # all other pilot code is now tried/excepted.  If an exception is caught, we
    # can rely on the session object to exist and be valid, and we can thus tear
    # the whole RP stack down via a 'session.close()' call in the 'finally'
    # clause...
    try:

        # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
        print "Initializing Pilot Manager ..."
        pmgr = rp.PilotManager(session=session)

        # Register our callback with the PilotManager. This callback will get
        # called every time any of the pilots managed by the PilotManager
        # change their state.
        pmgr.register_callback(pilot_state_cb)

        pdesc = rp.ComputePilotDescription()
        pdesc.resource = cfg['cp_resource']
        if cfg['cp_schema']:
            pdesc.access_schema = cfg['cp_schema']
        pdesc.project = cfg['cp_project']
        pdesc.queue = cfg['cp_queue']
        pdesc.runtime = cfg['cp_runtime']
        pdesc.cores = cfg['cp_cores']
        pdesc.cleanup = True

        # submit the pilot.
        print "Submitting Compute Pilot to Pilot Manager ..."
        pilot = pmgr.submit_pilots(pdesc)

        # Combine the ComputePilot, the ComputeUnits and a scheduler via
        # a UnitManager object.
        print "Initializing Unit Manager ..."
        umgr = rp.UnitManager(session=session,
                              scheduler=rp.SCHEDULER_DIRECT_SUBMISSION)

        # Register our callback with the UnitManager. This callback will get
        # called every time any of the units managed by the UnitManager
        # change their state.
        umgr.register_callback(unit_state_cb)

        # Add the created ComputePilot to the UnitManager.
        print "Registering Compute Pilot with Unit Manager ..."
        umgr.add_pilots(pilot)

        NUMBER_JOBS = 10  # the total number of cus to run

        # submit CUs to pilot job
        cudesc_list = []
        for i in range(NUMBER_JOBS):

            cudesc = rp.ComputeUnitDescription()
            if cfg['cu_pre_exec']:
                cudesc.pre_exec = cfg['cu_pre_exec']
            cudesc.executable = cfg['executable']
            cudesc.arguments = ["helloworld_mpi.py"]
            cudesc.input_staging = [
                "%s/../examples/helloworld_mpi.py" % cfg['pwd']
            ]
            cudesc.cores = cfg['cu_cores']
            cudesc.mpi = True

            cudesc_list.append(cudesc)

        # Submit the previously created ComputeUnit descriptions to the
        # PilotManager. This will trigger the selected scheduler to start
        # assigning ComputeUnits to the ComputePilots.
        print "Submit Compute Units to Unit Manager ..."
        cu_set = umgr.submit_units(cudesc_list)

        print "Waiting for CUs to complete ..."
        umgr.wait_units()
        print "All CUs completed successfully!"

        for unit in cu_set:
            print "* Task %s - state: %s, exit code: %s, started: %s, finished: %s, stdout: %s" \
                  % (unit.uid, unit.state, unit.exit_code, unit.start_time, unit.stop_time, unit.stdout)

            assert (unit.state == rp.DONE)
            for i in range(cfg['cu_cores']):
                assert ('mpi rank %d/%d' % (i + 1, cfg['cu_cores'])
                        in unit.stdout)

    except Exception as e:
        # Something unexpected happened in the pilot code above
        print "caught Exception: %s" % e
        raise

    except (KeyboardInterrupt, SystemExit) as e:
        # the callback called sys.exit(), and we can here catch the
        # corresponding KeyboardInterrupt exception for shutdown.  We also catch
        # SystemExit (which gets raised if the main threads exits for some other
        # reason).
        print "need to exit now: %s" % e
        raise

    finally:
        # always clean up the session, no matter if we caught an exception or
        # not.
        print "closing session"
        print "SESSION ID: %s" % session.uid
        session.close(cleanup=False)
Exemple #10
0
def test_fail_issue_172(setup_stampede):

    session, pilot, pmgr, umgr, resource = setup_stampede

    umgr.register_callback(unit_state_cb)

    # generate some units which use env vars in different ways, w/ and w/o MPI
    env_variants = [
        'UNDEFINED',  # Special case: env will not be set
        None,  # None
        {},  # empty dict
        {
            'foo': 'bar'
        },  # single entry dict
        {
            'foo': 'bar',
            'sports': 'bar',
            'banana': 'bar'
        }  # multi entry dict
    ]

    compute_units = []
    idx = 1
    for env in env_variants:

        # Serial
        cud = rp.ComputeUnitDescription()
        cud.name = "serial_" + str(idx)
        cud.executable = "/bin/echo"
        cud.arguments = [
            'Taverns:', '$foo', '$sports', '$banana', 'dollars\$\$',
            '"$dollar"', 'sitting \'all\' by myself', 'drinking "cheap" beer'
        ]
        if env != 'UNDEFINED':
            cud.environment = env

        compute_units.append(cud)

        # MPI
        cud = rp.ComputeUnitDescription()
        cud.name = "mpi_" + str(idx)
        cud.pre_exec = CONFIG[resource]["pre_exec"]
        cud.executable = "python"
        cud.input_staging = ["mpi4py_env.py"]
        cud.arguments = 'mpi4py_env.py'
        cud.cores = 2
        cud.mpi = True
        if env != 'UNDEFINED':
            cud.environment = env

        compute_units.append(cud)
        idx += 1

    units = umgr.submit_units(compute_units)

    umgr.wait_units()

    if not isinstance(units, list):
        units = [units]

    for unit in units:
        print unit.stdout
        print "\n\n"
        print "* Task %s - env: %s state: %s, exit code: %s, started: %s, finished: %s, stdout: %s" \
            % (unit.uid, unit.description.environment, unit.state, \
               unit.exit_code, unit.start_time, unit.stop_time, repr(unit.stdout))

        assert (unit.state == rp.DONE)
        if unit.name == "serial_1" or unit.name == "serial_2" or unit.name == "serial_3":
            assert ("Taverns:    dollars$$ \"\"" in unit.stdout)

        if unit.name == "mpi_1" or unit.name == "mpi_2" or unit.name == "mpi_3":
            assert ("Taverns: None, None, None" in unit.stdout)

        if unit.name == "serial_4":
            assert ("Taverns: bar   dollars$$ \"\"" in unit.stdout)

        if unit.name == "mpi_4":
            assert ("Taverns: bar, None, None" in unit.stdout)

        if unit.name == "serial_5":
            assert ("Taverns: bar bar bar dollars$$ \"\"" in unit.stdout)

        if unit.name == "mpi_5":
            assert ("Taverns: bar, bar, bar" in unit.stdout)
    def to_cu(self):
        # Write the python file, stage it, run it and return the model
        cu = rp.ComputeUnitDescription({})

        return cu
def create_cud_from_task(task, placeholders, prof=None):
    """
    Purpose: Create a Compute Unit description based on the defined Task.

    :arguments:
        :task:         EnTK Task object
        :placeholders: dictionary holding the values for placeholders

    :return: ComputeUnitDescription
    """

    try:

        logger.debug('Creating CU from Task %s' % (task.uid))

        if prof:
            prof.prof('cud_create', uid=task.uid)

        cud = rp.ComputeUnitDescription()
        cud.name = '%s,%s,%s,%s,%s,%s' % (
            task.uid, task.name, task.parent_stage['uid'],
            task.parent_stage['name'], task.parent_pipeline['uid'],
            task.parent_pipeline['name'])
        cud.pre_exec = task.pre_exec
        cud.executable = task.executable
        cud.arguments = resolve_arguments(task.arguments, placeholders)
        cud.sandbox = task.sandbox
        cud.post_exec = task.post_exec

        if task.tag:
            if task.parent_pipeline['name']:
                cud.tag = resolve_tags(
                    tag=task.tag,
                    parent_pipeline_name=task.parent_pipeline['name'],
                    placeholders=placeholders)

        cud.cpu_processes = task.cpu_reqs['processes']
        cud.cpu_threads = task.cpu_reqs['threads_per_process']
        cud.cpu_process_type = task.cpu_reqs['process_type']
        cud.cpu_thread_type = task.cpu_reqs['thread_type']
        cud.gpu_processes = task.gpu_reqs['processes']
        cud.gpu_threads = task.gpu_reqs['threads_per_process']
        cud.gpu_process_type = task.gpu_reqs['process_type']
        cud.gpu_thread_type = task.gpu_reqs['thread_type']

        if task.lfs_per_process:
            cud.lfs_per_process = task.lfs_per_process

        if task.stdout: cud.stdout = task.stdout
        if task.stderr: cud.stderr = task.stderr

        cud.input_staging = get_input_list_from_task(task, placeholders)
        cud.output_staging = get_output_list_from_task(task, placeholders)

        if prof:
            prof.prof('cud from task - done', uid=task.uid)

        logger.debug('CU %s created from Task %s' % (cud.name, task.uid))

        return cud

    except Exception:
        logger.exception('CU creation failed')
        raise
Exemple #13
0
            ru.write_json(cfg, 'configs/wf0.%s.cfg' % name)

            pd = rp.ComputePilotDescription(cfg.pilot_descr)
            pd.cores = nodes * cpn
            pd.gpus = nodes * gpn
            pd.runtime = runtime

            pilot = pmgr.submit_pilots(pd)
            pid = pilot.uid

            umgr.add_pilots(pilot)

            tds = list()

            for i in range(n_masters):
                td = rp.ComputeUnitDescription(cfg.master_descr)
                td.executable = "python3"
                td.arguments = ['wf0_master.py', i]
                td.cpu_threads = cpn
                td.pilot = pid
                td.input_staging = [
                    {
                        'source': 'wf0_master.py',
                        'target': 'wf0_master.py',
                        'action': rp.TRANSFER,
                        'flags': rp.DEFAULT_FLAGS
                    },
                    {
                        'source': 'wf0_worker.py',
                        'target': 'wf0_worker.py',
                        'action': rp.TRANSFER,
def run_test(cfg):

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()
    print "session id: %s" % session.uid

    # all other pilot code is now tried/excepted.  If an exception is caught, we
    # can rely on the session object to exist and be valid, and we can thus tear
    # the whole RP stack down via a 'session.close()' call in the 'finally'
    # clause...
    try:

        pmgr = rp.PilotManager(session=session)
        pmgr.register_callback(pilot_state_cb)

        pdesc = rp.ComputePilotDescription()
        pdesc.resource = cfg['cp_resource']
        pdesc.cores = cfg['cp_cores']
        pdesc.project = cfg['cp_project']
        pdesc.queue = cfg['cp_queue']
        pdesc.runtime = cfg['cp_runtime']
        pdesc.cleanup = False
        pdesc.access_schema = cfg['cp_schema']

        pilot = pmgr.submit_pilots(pdesc)

        input_sd_pilot = {
            'source': 'file:///etc/passwd',
            'target': 'staging:///f1',
            'action': rp.TRANSFER
        }
        pilot.stage_in(input_sd_pilot)

        umgr = rp.UnitManager(session=session, scheduler=SCHED)
        umgr.register_callback(unit_state_cb, rp.UNIT_STATE)
        umgr.register_callback(wait_queue_size_cb, rp.WAIT_QUEUE_SIZE)
        umgr.add_pilots(pilot)

        input_sd_umgr = {
            'source': 'file:///etc/group',
            'target': 'f2',
            'action': rp.COPY
        }
        input_sd_agent = {
            'source': 'staging:///f1',
            'target': 'f1',
            'action': rp.COPY
        }
        output_sd_agent = {
            'source': 'f1',
            'target': 'staging:///f1.bak',
            'action': rp.COPY
        }
        output_sd_umgr = {
            'source': 'f2',
            'target': 'f2.bak',
            'action': rp.TRANSFER
        }

        cuds = list()
        for unit_count in range(0, UNITS):
            cud = rp.ComputeUnitDescription()
            cud.executable = "wc"
            cud.arguments = ["f1", "f2"]
            cud.cores = 1
            cud.input_staging = [input_sd_umgr, input_sd_agent]
            cud.output_staging = [output_sd_umgr, output_sd_agent]
            cuds.append(cud)

        units = umgr.submit_units(cuds)

        umgr.wait_units()

        for cu in units:
            print "* Task %s state %s, exit code: %s, started: %s, finished: %s" \
                % (cu.uid, cu.state, cu.exit_code, cu.start_time, cu.stop_time)

    # os.system ("radicalpilot-stats -m stat,plot -s %s > %s.stat" % (session.uid, session_name))

    except Exception as e:
        # Something unexpected happened in the pilot code above
        print "caught Exception: %s" % e
        raise

    except (KeyboardInterrupt, SystemExit) as e:
        # the callback called sys.exit(), and we can here catch the
        # corresponding KeyboardInterrupt exception for shutdown.  We also catch
        # SystemExit (which gets raised if the main threads exits for some other
        # reason).
        print "need to exit now: %s" % e
        raise

    finally:
        # always clean up the session, no matter if we caught an exception or
        # not.
        print "closing session"
        print "SESSION ID: %s" % session.uid
        session.close(cleanup=False)
Exemple #15
0
        'go://marksant#netbook/Users/mark/proj/radical.pilot/examples/helloworld_mpi.py',
        'target': 'go://nersc#edison/scratch2/scratchdirs/marksant/go/',
        #'target':   'staging:///go/',
        'action': rp.TRANSFER
    }
    pilot.stage_in(pilot_globe)

    unit_globe = {
        'source': '/scratch2/scratchdirs/marksant/go/helloworld_mpi.py',
        #'source':   'staging:///go/helloworld_mpi.py',
        'action': rp.LINK,
    }

    for unit_count in range(0, 1):

        mpi_test_task = rp.ComputeUnitDescription()

        mpi_test_task.pre_exec = ["module load python", "module load mpi4py"]
        mpi_test_task.input_staging = [unit_globe]
        mpi_test_task.executable = "python-mpi"
        mpi_test_task.arguments = ["helloworld_mpi.py"]
        mpi_test_task.mpi = True
        mpi_test_task.cores = 24

        cud_list.append(mpi_test_task)

    # Combine the ComputePilot, the ComputeUnits and a scheduler via
    # a UnitManager object.
    umgr = rp.UnitManager(session, scheduler=rp.SCHED_DIRECT)

    # Register our callback with the UnitManager. This callback will get
Exemple #16
0
        pdesc.project = 'mc3bggp'
        pdesc.gpus = 2
        pdesc.cores = 24
        pdesc.runtime = 60
        pdesc.exit_on_error = True
        pdesc.queue = 'GPU'

        pmgr = rp.PilotManager(session=session)

        pilot = pmgr.submit_pilots(pdesc)

        umgr = rp.UnitManager(session=session)

        umgr.add_pilots(pilot)

        cud1 = rp.ComputeUnitDescription()

        cud1.executable = 'python3'
        cud1.gpu_processes = 1
        cud1.cpu_processes = 1
        cud1.pre_exec = [
            'module load psc_path/1.1', 'module load slurm/default',
            'module load intel/17.4', 'module load python3',
            'module load cuda', 'mkdir -p classified_images/crabeater',
            'mkdir -p classified_images/weddel',
            'mkdir -p classified_images/pack-ice',
            'mkdir -p classified_images/other',
            'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate'
        ]
        cud1.arguments = [
            'pt_predict.py', '-class_names', 'crabeater', 'weddel', 'pack-ice',
    def test_generate_trajectorygenerationtask_generation_cud(self):
        """Test proper Compute Unit Description generation for TrajectoryGenerationTask"""
        task_descriptions = self.db.get_task_descriptions()

        # PythonTask
        task_desc = dict()
        for task in task_descriptions:
            if task['_id'] == "04f01b52-8c69-11e7-9eb2-000000000124":
                task_desc = task
                break

        cud = utils.generate_trajectorygenerationtask_cud(
            task_desc, self.db, '/home/test', self.db.project)
        actual_cud = rp.ComputeUnitDescription()
        actual_cud.name = "04f01b52-8c69-11e7-9eb2-000000000124"
        actual_cud.environment = {"TEST1": "1", "TEST2": "2"}
        actual_cud.input_staging = [{
            "action": "Link",
            "source": "pilot:///alanine.pdb",
            "target": "unit:///initial.pdb"
        }, {
            "action": "Link",
            "source": "pilot:///system.xml",
            "target": "unit:///system.xml"
        }, {
            "action": "Link",
            "source": "pilot:///integrator.xml",
            "target": "unit:///integrator.xml"
        }, {
            "action": "Link",
            "source": "pilot:///openmmrun.py",
            "target": "unit:///openmmrun.py"
        }]
        actual_cud.pre_exec = [
            'mkdir -p traj', 'mkdir -p extension',
            'source /home/test/venv/bin/activate',
            'mdconvert -o input.pdb -i 3 -t initial.pdb source/allatoms.dcd'
        ]
        actual_cud.executable = 'python'
        actual_cud.arguments = [
            "openmmrun.py", "-r", "--report-interval", "1", "-p", "CPU",
            "--types",
            "{'protein':{'stride':1,'selection':'protein','name':null,'filename':'protein.dcd'},'master':{'stride':10,'selection':null,'name':null,'filename':'master.dcd'}}",
            "-t", "initial.pdb", "--length", "100", "traj/"
        ]
        actual_cud.output_staging = [
            {
                "action":
                "Move",
                "source":
                "traj/restart.npz",
                "target":
                "/home/test//projects/rp_testing_modeller_1/trajs/00000004//restart.npz"
            },
            {
                "action":
                "Move",
                "source":
                "traj/master.dcd",
                "target":
                "/home/test//projects/rp_testing_modeller_1/trajs/00000004//master.dcd"
            },
            {
                "action":
                "Move",
                "source":
                "traj/protein.dcd",
                "target":
                "/home/test//projects/rp_testing_modeller_1/trajs/00000004//protein.dcd"
            },
        ]
        actual_cud.post_exec = ['deactivate']

        actual_cud.cpu_process_type = 'POSIX'
        #actual_cud.gpu_process_type  = 'POSIX'
        actual_cud.cpu_thread_type = 'POSIX'
        #actual_cud.gpu_thread_type   = 'CUDA'
        actual_cud.cpu_processes = 1
        #actual_cud.gpu_processes = 1
        actual_cud.cpu_threads = 1
        #actual_cud.gpu_threads   = 1

        # compare all parts of the cuds
        self.maxDiff = None
        self.assertEquals(cud.name, actual_cud.name)
        self.assertDictEqual(cud.environment, actual_cud.environment)
        self.assertListEqual(cud.input_staging, actual_cud.input_staging)
        self.assertListEqual(cud.pre_exec, actual_cud.pre_exec)
        self.assertEquals(cud.executable, actual_cud.executable)
        self.assertListEqual(cud.arguments, actual_cud.arguments)
        self.assertListEqual(cud.output_staging, actual_cud.output_staging)
        self.assertListEqual(cud.post_exec, actual_cud.post_exec)

        self.assertEquals(cud.cpu_process_type, actual_cud.cpu_process_type)
        self.assertEquals(cud.cpu_thread_type, actual_cud.cpu_thread_type)
        self.assertEquals(cud.cpu_processes, actual_cud.cpu_processes)
        self.assertEquals(cud.cpu_threads, actual_cud.cpu_threads)
print "Session id: %s Pilot Manager: %s" % (session.uid, str(pmgr.as_dict()))

pdesc = rp.ComputePilotDescription()
pdesc.resource = "xsede.wrangler_streaming"  # NOTE: This is a "label", not a hostname
pdesc.runtime = 20  # minutes
pdesc.cores = 24
pdesc.cleanup = False
pdesc.project = "TG-MCB090174"
pdesc.queue = 'debug'
pdesc.access_schema = 'gsissh'

pilot = pmgr.submit_pilots(pdesc)
umgr.add_pilots(pilot)

#----------BEGIN USER DEFINED TEST-CU DESCRIPTION-------------------#
cudesc = rp.ComputeUnitDescription()
cudesc.executable = 'python'
cudesc.arguments = ['start_redis.py']
cudesc.input_staging = ['start_redis.py']
cudesc.cores = 1
#-----------END USER DEFINED TEST-CU DESCRIPTION--------------------#

print 'Starting up Kafka cluster..'
cu_set = umgr.submit_units([cudesc])
umgr.wait_units()
print 'Kafka cluster is running'

pilot_info = pilot.as_dict()
pilot_info = pilot_info['resource_details']['lm_detail']
broker = pilot_info['brokers'][0] + ':9092'
print 'broker %s ' % broker
    def test_generate_trajectorygenerationtask_extension_cud(self):
        """Test proper Compute Unit Description generation for TrajectoryExtensionTask"""
        task_descriptions = self.db.get_task_descriptions()

        # PythonTask
        task_desc = dict()
        for task in task_descriptions:
            if task['_id'] == "24888d76-219e-11e8-8f6d-000000000118":
                task_desc = task
                break

        cud = utils.generate_trajectorygenerationtask_cud(
            task_desc, self.db, self.shared_path, self.project)
        actual_cud = rp.ComputeUnitDescription()
        actual_cud.name = "24888d76-219e-11e8-8f6d-000000000118"
        actual_cud.environment = {
            "OPENMM_CPU_THREADS": "1",
            "TEST1": "1",
            "TEST2": "2",
            "TEST3": "hello"
        }
        actual_cud.input_staging = [{
            "action": "Link",
            "source": "pilot:///ntl9.pdb",
            "target": "unit:///initial.pdb"
        }, {
            "action": "Link",
            "source": "pilot:///system-2.xml",
            "target": "unit:///system-2.xml"
        }, {
            "action": "Link",
            "source": "pilot:///integrator-2.xml",
            "target": "unit:///integrator-2.xml"
        }, {
            "action": "Link",
            "source": "pilot:///openmmrun.py",
            "target": "unit:///openmmrun.py"
        }, {
            "action": "Link",
            "source": "/home/test//projects/test_analysis/trajs/00000000/",
            "target": "unit:///source"
        }]
        actual_cud.pre_exec = [
            'mkdir -p traj', 'mkdir -p extension', "module load python",
            "source /lustre/atlas/proj-shared/bip149/jrossyra/admdrp/admdrpenv/bin/activate"
        ]
        actual_cud.executable = 'python'
        actual_cud.arguments = [
            "openmmrun.py", "-r", "-p", "CPU", "--types",
            "{'protein':{'stride':2,'selection':'protein','name':null,'filename':'protein.dcd'},'master':{'stride':10,'selection':null,'name':null,'filename':'allatoms.dcd'}}",
            "-s", "system-2.xml", "-i", "integrator-2.xml", "--restart",
            "/home/test//projects/test_analysis/trajs/00000000/restart.npz",
            "-t", "initial.pdb", "--length", "200", "extension/"
        ]
        actual_cud.output_staging = [
            {
                "action": "Move",
                "source": "extension/protein.temp.dcd",
                "target": "extension/protein.dcd"
            },
            {
                "action": "Move",
                "source": "extension/master.temp.dcd",
                "target": "extension/allatoms.dcd"
            },
            {
                "action":
                "Move",
                "source":
                "extension/restart.npz",
                "target":
                "/home/test//projects/test_analysis/trajs/00000000//restart.npz"
            },
            {
                "action":
                "Move",
                "source":
                "extension/allatoms.dcd",
                "target":
                "/home/test//projects/test_analysis/trajs/00000000//allatoms.dcd"
            },
            {
                "action":
                "Move",
                "source":
                "extension/protein.dcd",
                "target":
                "/home/test//projects/test_analysis/trajs/00000000//protein.dcd"
            },
        ]
        actual_cud.post_exec = [
            "mdconvert -o extension/protein.temp.dcd source/protein.dcd extension/protein.dcd",
            "mdconvert -o extension/master.temp.dcd source/allatoms.dcd extension/allatoms.dcd",
            "deactivate"
        ]
        actual_cud.cpu_process_type = 'POSIX'
        actual_cud.gpu_process_type = 'POSIX'
        actual_cud.cpu_thread_type = 'POSIX'
        actual_cud.gpu_thread_type = 'CUDA'
        actual_cud.cpu_processes = 1
        actual_cud.gpu_processes = 1
        actual_cud.cpu_threads = 1
        actual_cud.gpu_threads = 1

        # compare all parts of the cuds
        self.maxDiff = None
        self.assertEquals(cud.name, actual_cud.name)
        self.assertDictEqual(cud.environment, actual_cud.environment)
        self.assertListEqual(cud.input_staging, actual_cud.input_staging)
        self.assertListEqual(cud.pre_exec, actual_cud.pre_exec)
        self.assertEquals(cud.executable, actual_cud.executable)
        self.assertListEqual(cud.arguments, actual_cud.arguments)
        self.assertListEqual(cud.output_staging, actual_cud.output_staging)
        self.assertListEqual(cud.post_exec, actual_cud.post_exec)

        self.assertEquals(cud.cpu_process_type, actual_cud.cpu_process_type)
        self.assertEquals(cud.cpu_thread_type, actual_cud.cpu_thread_type)
        self.assertEquals(cud.cpu_processes, actual_cud.cpu_processes)
        self.assertEquals(cud.cpu_threads, actual_cud.cpu_threads)
Exemple #20
0
        pilots = pmgr.submit_pilots(pdescs)
        umgr.add_pilots(pilots)

        report.header('submit units')

        # Create a workload of ComputeUnits.
        # Each compute unit runs '/bin/date'.
        n = 128  # number of units to run
        report.info('create %d unit description(s)\n\t' % n)

        cuds = list()
        for i in range(0, n):

            # create a new CU description, and fill it.
            # Here we don't use dict initialization.
            cud = rp.ComputeUnitDescription()
            if i % 10:
                cud.executable = '/bin/date'
            else:
                # trigger an error now and then
                cud.executable = '/bin/data'  # does not exist
            cuds.append(cud)
            report.progress()

        report.ok('>>ok\n')

        # Submit the previously created ComputeUnit descriptions to the
        # PilotManager. This will trigger the selected scheduler to start
        # assigning ComputeUnits to the ComputePilots.
        units = umgr.submit_units(cuds)
Exemple #21
0
 def to_cud(self):
     cud = rp.ComputeUnitDescription()
     cud.executable = self.bash_exec
     cud.arguments = self.bash_args + list(self)
     cud.input_staging = self.input_staging
     cud.output_staging = self.output_staging