예제 #1
0
    def test__pilotmanager_list_pilots_after_reconnect(self):
        """ Test if listing pilots after a reconnect works as expected.
        """
        session = rp.Session()

        pm1 = rp.PilotManager(session=session)
        assert len(pm1.list_pilots()) == 0, "Wrong number of pilots returned."

        pm2 = rp.PilotManager(session=session)
        assert len(pm2.list_pilots()) == 0, "Wrong number of pilots returned."

        for i in range(0, 2):
            cpd = rp.ComputePilotDescription()
            cpd.resource = "local.localhost"
            cpd.cores = 1
            cpd.runtime = 1
            cpd.sandbox = "/tmp/rp.sandbox.unittests"
            cpd.cleanup = True

            pm1.submit_pilots(descriptions=cpd)
            pm2.submit_pilots(descriptions=cpd)

        assert len(pm1.list_pilots()) == 2, "Wrong number of pilots returned."
        assert len(pm2.list_pilots()) == 2, "Wrong number of pilots returned."

        pm1_r = session.get_pilot_managers(pilot_manager_ids=pm1.uid)
        pm2_r = session.get_pilot_managers(pilot_manager_ids=pm2.uid)

        assert len(
            pm1_r.list_pilots()) == 2, "Wrong number of pilots returned."
        assert len(
            pm2_r.list_pilots()) == 2, "Wrong number of pilots returned."

        session.close()
예제 #2
0
def test_runtime_mismatch(pilot_description):
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.task_manager')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.db.database')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.session')

        session = rp.Session()

        with session:
            original_pmgr = rp.PilotManager(session=session)
            pilot = original_pmgr.submit_pilots(rp.PilotDescription(pilot_description))
            original_tmgr = rp.TaskManager(session=session)
            original_tmgr.add_pilots(pilot)

        assert session.closed
        # This assertion may not be true:
        # assert pilot.state in rp.FINAL
        # Note that Pilot and other components may still be shutting down, but the
        # intention is that, from this point, pmgr, pilot, and tmgr are now "stale".

        session = rp.Session()

        with session:
            state = Runtime(session=session)

            with pytest.raises(APIError):
                state.task_manager(original_tmgr)
            original_tmgr.close()

            tmgr = rp.TaskManager(session=session)
            state.task_manager(tmgr)

            with pytest.raises(APIError):
                state.pilot_manager(original_pmgr)
            original_pmgr.close()

            pmgr = rp.PilotManager(session=session)
            state.pilot_manager(pmgr)

            # The UID will not resolve in the stored PilotManager.
            with pytest.raises(ValueError):
                state.pilot(pilot.uid)

            # The Pilot is detectably invalid.
            with pytest.raises(APIError):
                state.pilot(pilot)

            # Even here, the old Pilot may still be in 'PMGR_ACTIVE_PENDING'
            if pilot.state not in rp.FINAL:
                pilot.cancel()
            tmgr.close()
            pmgr.close()
        assert session.closed
예제 #3
0
    def test__pilot_errors(self):
        """ Test if pilot errors are raised properly.
        """
        session = rp.Session()

        try:
            pm = rp.PilotManager(session=session)

            cpd = rp.ComputePilotDescription()
            cpd.resource = "local.localhost"
            cpd.cores = 1
            cpd.runtime = 1
            cpd.sandbox = "/non-/existing/directory..."
            cpd.cleanup = True

            pilot = pm.submit_pilots(descriptions=cpd)
            pilot.wait(timeout=300)
            assert pilot.state == rp.FAILED, "State is '%s' instead of 'Failed'." % pilot.state

            cpd = rp.ComputePilotDescription()
            cpd.resource = "local.localhost"
            cpd.cores = 100000000000  # This should fail - at least in 2014 ;-)
            cpd.runtime = 1
            cpd.sandbox = "/tmp/rp.sandbox.unittests"
            cpd.cleanup = True

            pilot = pm.submit_pilots(descriptions=cpd)
            pilot.wait(timeout=300)
            assert pilot.state == rp.FAILED, ("state should be %s and not %s" %
                                              (rp.FAILED, pilot.state))

        finally:
            session.close()
예제 #4
0
    def test__pilot_cancel(self):
        """ Test if we can cancel a pilot.
        """
        session = rp.Session()

        try:
            pm = rp.PilotManager(session=session)

            cpd = rp.ComputePilotDescription()
            cpd.resource = "local.localhost"
            cpd.cores = 1
            cpd.runtime = 1
            cpd.sandbox = "/tmp/rp.sandbox.unittests"
            cpd.cleanup = True

            pilot = pm.submit_pilots(descriptions=cpd)

            assert pilot is not None
            assert pilot.start_time is None
            assert pilot.stop_time is None

            pilot.wait(state=[rp.PMGR_ACTIVE, rp.FAILED], timeout=300)
            assert pilot.submission_time is not None
            assert pilot.state == rp.PMGR_ACTIVE
            assert pilot.start_time is not None

            # the pilot should finish after it has reached run_time
            pilot.cancel()

            pilot.wait(timeout=300)
            assert pilot.state == rp.CANCELED
            assert pilot.stop_time is not None

        finally:
            session.close()
예제 #5
0
    def __init__(self, descr: dict, executor: jpsi.JobExecutor,
                 url: str) -> None:

        jpsi.ExecutorAdaptorBase.__init__(self, descr, executor, url)

        self._url = ru.Url(url)
        if self._url.schema != 'rp':
            raise ValueError('handle only rp:// URLs, not %s', self._url)

        try:
            self._jobs = dict()  # {job.uid : [JPSI_JOB, RP_TASK]
            self._lock = mt.Lock()

            self._session = rp.Session()

            self._pmgr = rp.PilotManager(session=self._session)
            self._tmgr = rp.TaskManager(session=self._session)

            self._pmgr.register_callback(self._pilot_state_cb)
            self._tmgr.register_callback(self._task_state_cb)

            # this is layer 0, so we just create a dummy pilot
            pd = rp.PilotDescription({
                'resource': 'local.localhost',
                'cores': 16,
                'runtime': 60
            })
            self._pilot = self._pmgr.submit_pilots(pd)
            self._tmgr.add_pilots(self._pilot)

        except Exception:
            self._log.exception('init failed')
            raise
예제 #6
0
    def test__issue_114_part_3(self):
        """ https://github.com/radical-cybertools/radical.pilot/issues/114
        """
        session = rp.Session(database_url=DBURL, database_name=DBNAME)

        pm = rp.PilotManager(session=session)

        cpd = rp.ComputePilotDescription()
        cpd.resource = "local.localhost"
        cpd.cores = 1
        cpd.runtime = 1
        cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests"
        cpd.cleanup = True

        pilot = pm.submit_pilots(pilot_descriptions=cpd)

        um = rp.UnitManager(session=session,
                            scheduler=rp.SCHED_DIRECT_SUBMISSION)
        um.add_pilots(pilot)

        state = pm.wait_pilots(state=[rp.PMGR_ACTIVE, rp.DONE, rp.FAILED],
                               timeout=10 * 60)

        assert state == [rp.PMGR_ACTIVE], 'state      : %s' % state
        assert pilot.state == rp.PMGR_ACTIVE, 'pilot state: %s' % pilot.state

        state = pm.wait_pilots(timeout=3 * 60)

        assert state == [rp.DONE], 'state      : %s' % state
        assert pilot.state == rp.DONE, 'pilot state: %s' % pilot.state

        session.close()
예제 #7
0
    def setUp(self):
        """ Getting the resources is slow, to avoid calling it for each
        test use setUpClass() and store the result as class variable
        """
        # Set-up the resource, hard-coding 'localhost' for now...
        self.resource = 'local.localhost'

        # Create a new session. No need to try/except this: if session creation
        # fails, there is not much we can do anyways...
        self.session = rp.Session()
        # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
        self.pmgr = rp.PilotManager(session=self.session)
        # Create a UnitManager object.
        self.umgr = rp.UnitManager(session=self.session)

        # Define an [n]-core local pilot that runs for [x] minutes
        # Here we use a dict to initialize the description object
        self.pd_init = {
            'resource': self.resource,
            'runtime': 15,  # pilot runtime (min)
            'exit_on_error': True,
            'project': self.config[self.resource]['project'],
            'queue': self.config[self.resource]['queue'],
            'access_schema': self.config[self.resource]['schema'],
            'cores': self.config[self.resource]['cores'],
        }
예제 #8
0
def rp_setup_state(request):

    session = rp.Session(database_url=db_url)

    try:
        pmgr = rp.PilotManager(session=session)
        umgr = rp.UnitManager(session=session,
                              scheduler=rp.SCHED_DIRECT_SUBMISSION,
                              output_transfer_workers=4,
                              input_transfer_workers=4)

        pdesc = rp.ComputePilotDescription()
        pdesc.resource = "local.localhost"
        pdesc.runtime = 20
        pdesc.cores = 1
        pdesc.cleanup = True

        pilot = pmgr.submit_pilots(pdesc)
        pilot.register_callback(pilot_state_cb)

        umgr.add_pilots(pilot)

    except Exception as e:
        print 'test failed'
        raise

    def fin():
        print 'closing session'
        session.close()

    request.addfinalizer(fin)

    return pilot, pmgr, umgr
예제 #9
0
def rp_setup_short(request):

    session = rp.Session(database_url=db_url)

    try:
        pmgr = rp.PilotManager(session=session)
        umgr = rp.UnitManager(session=session,
                              scheduler=rp.SCHED_DIRECT_SUBMISSION)

        pdesc = rp.ComputePilotDescription()
        pdesc.resource = "local.localhost"
        pdesc.runtime = 1
        pdesc.cores = 1
        pdesc.sandbox = "/tmp/radical.pilot.sandbox.unittests"
        pdesc.cleanup = True

        pilot = pmgr.submit_pilots(pdesc)
        pilot.register_callback(pilot_state_cb)

        umgr.add_pilots(pilot)

    except Exception as e:
        print 'test failed'
        raise

    def fin():
        pmgr.cancel_pilots()
        pmgr.wait_pilots()

        print 'closing session'
        session.close()

    request.addfinalizer(fin)

    return pilot, pmgr, umgr
    def test__add_resource_config_2(self):
        """ Test if we can wait for different pilot states.
        """
        session = rp.Session()

        rc = rp.ResourceConfig("mylocalhost")
        rc.task_launch_method   = "LOCAL"
        rc.mpi_launch_method    = "MPIRUN"
        rc.job_manager_endpoint = "fork://localhost"
        rc.filesystem_endpoint  = "file://localhost/"
        rc.bootstrapper         = "default_bootstrapper.sh"

        pm = rp.PilotManager(session=session)
        session.add_resource_config(rc)

        pd = rp.ComputePilotDescription()
        pd.resource = "mylocalhost"
        pd.cores    = 1
        pd.runtime  = 1
        pd.sandbox = "/tmp/rp.sandbox.unittests"
        pd.cleanup = True

        pilot = pm.submit_pilots(pd)
        pilot.wait(timeout=300)
        pilot.cancel()

        session.close()
예제 #11
0
def test_runtime_bad_uid(pilot_description):
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.task_manager')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.db.database')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.session')

        session = rp.Session()

        with session:
            state = Runtime(session=session)

            with pytest.raises(ValueError):
                state.task_manager('spam')

            tmgr = rp.TaskManager(session=session)
            state.task_manager(tmgr)

            with pytest.raises(ValueError):
                state.pilot_manager('spam')

            pmgr = rp.PilotManager(session=session)
            state.pilot_manager(pmgr)

            with pytest.raises(ValueError):
                state.pilot_manager('spam')

            tmgr.close()
            pmgr.close()

        assert session.closed
예제 #12
0
    def __init__(self, log=None, rep=None, prof=None):

        if log: self._log = log
        else: self._log = ru.Logger('radical.nge')

        if rep: self._rep = log
        else: self._rep = ru.Reporter('radical.nge')

        if prof: self._prof = prof
        else: self._prof = ru.Profiler('radical.nge')

        self._session = rp.Session()
        self._pmgr = rp.PilotManager(self._session)
        self._umgr = rp.UnitManager(self._session)

        self._pmgr.register_callback(self._pilot_state_cb)
        self._umgr.register_callback(self._unit_state_cb)

        # create a dir for data staging
        self._pwd = os.getcwd()
        self._data = 'data.%s' % self._session.uid
        os.makedirs('%s/%s/' % (self._pwd, self._data))

        # track submitted tasks
        self._tcnt = 0
        self._tasks = dict()
예제 #13
0
    def test__pilotmanager_wait(self):
        """Test if wait() waits until all (2) pilots have reached 'DONE' state.
        """
        session = rp.Session()

        pmgr = rp.PilotManager(session=session)

        cpd1 = rp.ComputePilotDescription()
        cpd1.resource = "local.localhost"
        cpd1.cores = 1
        cpd1.runtime = 1
        cpd1.sandbox = "/tmp/rp.sandbox.unittests"
        cpd1.cleanup = True

        cpd2 = rp.ComputePilotDescription()
        cpd2.resource = "local.localhost"
        cpd2.cores = 1
        cpd2.runtime = 2
        cpd2.sandbox = "/tmp/rp.sandbox.unittests"
        cpd2.cleanup = True

        pilots = pmgr.submit_pilots([cpd1, cpd2])

        pmgr.wait_pilots(timeout=300)

        for pilot in pilots:
            assert pilot.state == rp.DONE, "Expected state 'Done' but state is %s" % pilot.state
            assert pilot.stop_time is not None
            assert pilot.start_time is not None

        session.close()
예제 #14
0
    def test__pilotmanager_create(self):
        """ Test if pilot manager creation works as expected.
        """
        session = rp.Session()

        assert session.list_pilot_managers(
        ) == [], "Wrong number of pilot managers"

        pm = rp.PilotManager(session=session)
        assert session.list_pilot_managers() == [
            pm.uid
        ], "Wrong list of pilot managers"

        pm = rp.PilotManager(session=session)
        assert len(session.list_pilot_managers()
                   ) == 2, "Wrong number of pilot managers"

        session.close()
예제 #15
0
def test_pass_issue258():

    session = rp.Session(database_url=db_url)

    with pytest.raises(KeyError):
        pmgr = rp.PilotManager(session=session)
        pmgr.wait_pilots(pilot_ids="12", state=rp.ACTIVE)

    session.close()
예제 #16
0
def test_pass_issue_57():

    for i in [16, 32, 64]:

        session = rp.Session(database_url=db_url)

        try:

            c = rp.Context('ssh')
            c.user_id = CONFIG["xsede.stampede"]["user_id"]
            session.add_context(c)

            pmgr = rp.PilotManager(session=session)
            umgr = rp.UnitManager(session=session,
                                  scheduler=rp.SCHED_ROUND_ROBIN)

            pdesc = rp.ComputePilotDescription()
            pdesc.resource = "xsede.stampede"
            pdesc.project = CONFIG["xsede.stampede"]["project"]
            pdesc.cores = i
            pdesc.runtime = 20
            pdesc.cleanup = False

            pilots = pmgr.submit_pilots(pdesc)

            umgr.add_pilots(pilots)

            unit_descrs = []
            for k in range(0, i * 2):
                cu = rp.ComputeUnitDescription()
                cu.cores = 1
                cu.executable = "/bin/date"
                unit_descrs.append(cu)

            units = umgr.submit_units(unit_descrs)

            try:
                umgr.wait_units()

                for unit in units:
                    unit.wait()
            except:
                pass

            pmgr.cancel_pilots()
            pmgr.wait_pilots()

        except Exception as e:
            print "TEST FAILED"
            raise

        finally:
            session.close()
예제 #17
0
    def test__pilotmanager_get_pilots(self):
        session = rp.Session()

        pm1 = rp.PilotManager(session=session)
        assert len(pm1.list_pilots()) == 0, "Wrong number of pilots returned."

        pm2 = rp.PilotManager(session=session)
        assert len(pm2.list_pilots()) == 0, "Wrong number of pilots returned."

        pm1_pilot_uids = []
        pm2_pilot_uids = []

        for i in range(0, 2):
            cpd = rp.ComputePilotDescription()
            cpd.resource = "local.localhost"
            cpd.cores = 1
            cpd.runtime = 1
            cpd.sandbox = "/tmp/rp.sandbox.unittests"
            cpd.cleanup = True

            pilot_pm1 = pm1.submit_pilots(descriptions=cpd)
            pm1_pilot_uids.append(pilot_pm1.uid)

            pilot_pm2 = pm2.submit_pilots(descriptions=cpd)
            pm2_pilot_uids.append(pilot_pm2.uid)

        for i in pm1.list_pilots():
            pilot = pm1.get_pilots(i)
            assert pilot.uid in pm1_pilot_uids, "Wrong pilot ID %s (not in %s)" % (
                pilot.uid, pm1_pilot_uids)

        assert len(pm1.get_pilots()) == 2, "Wrong number of pilots."

        for i in pm2.list_pilots():
            pilot = pm2.get_pilots(i)
            assert pilot.uid in pm2_pilot_uids, "Wrong pilot ID %s" % pilot.uid

        assert len(pm2.get_pilots()) == 2, "Wrong number of pilots."

        session.close()
예제 #18
0
def _new_pilotmanager(session: rp.Session):
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning,
                                module='radical.pilot.task_manager')
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning,
                                module='radical.pilot.db.database')
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning,
                                module='radical.pilot.session')

        return rp.PilotManager(session=session)
예제 #19
0
    def test__unitmanager_pilot_assoc(self):
        """ Test if unit manager <-> pilot association works as expected.
        """
        session = rp.Session()

        pm = rp.PilotManager(session=session)

        cpd = rp.ComputePilotDescription()
        cpd.resource = "local.localhost"
        cpd.cores = 1
        cpd.runtime = 1
        cpd.sandbox = "/tmp/rp.sandbox.unittests"
        cpd.cleanup = True

        p1 = pm.submit_pilots(descriptions=cpd)

        um = rp.UnitManager(session=session, scheduler='round_robin')
        assert um.list_pilots() == [], "Wrong list of pilots"

        um.add_pilots(p1)
        assert um.list_pilots() == [p1.uid], "Wrong list of pilots"

        # adding the same pilot twice should be ignored
        um.add_pilots(p1)
        assert um.list_pilots() == [p1.uid], "Wrong list of pilots"

        um.remove_pilots(p1.uid)
        assert um.list_pilots() == [], "Wrong list of pilots"

        pilot_list = []
        for x in range(0, 2):
            cpd = rp.ComputePilotDescription()
            cpd.resource = "local.localhost"
            cpd.cores = 1
            cpd.runtime = 1
            cpd.sandbox = "/tmp/rp.sandbox.unittests"
            cpd.cleanup = True
            p = pm.submit_pilots(descriptions=cpd)
            um.add_pilots(p)
            pilot_list.append(p)

        pl = um.list_pilots()
        assert len(pl) == 2, "Wrong number of associated pilots"
        for l in pilot_list:
            assert l in pilot_list, "Unknown pilot in list"
            um.remove_pilots(l.uid)

        assert um.list_pilots() == [], "Wrong list of pilots"

        session.close()
예제 #20
0
    def test__issue_262(self):
        """ https://github.com/radical-cybertools/radical.pilot/issues/18
        """
        session = rp.Session()
        pmgr = rp.PilotManager(session=session)

        # Create a local pilot with a million cores. This will most likely
        # fail as not enough cores will be available.  That means the pilot will
        # go quickly into failed state, and trigger the callback from above.
        pd = rp.ComputePilotDescription()
        pd.resource  = "local.localhost"
        pd.cores     = 1
        pd.runtime   = 1

        pilot = pmgr.submit_pilots(pd)

        umgr = rp.UnitManager(
            session=session,
            scheduler=rp.SCHED_DIRECT_SUBMISSION)
        umgr.add_pilots(pilot)

        cud = rp.ComputeUnitDescription()
        cud.executable    = "/bin/sleep"
        cud.arguments     = ["10"]
        cud.cores         = 1
        cud.input_staging = ["/etc/group"]


        unit = umgr.submit_units(cud)
        umgr.wait_units()    

        for log_entry in pilot.log:
             ld = log_entry.as_dict()
             assert "timestamp" in ld
             assert "message"   in ld

             s = "%s" % log_entry
             assert type(s) == unicode

        for log_entry in unit.log:
            ld = log_entry.as_dict()
            assert "timestamp" in ld
            assert "message"   in ld

            s = "%s" % log_entry
            assert type(s) == unicode

        session.close()
예제 #21
0
def start_pilot(cr=None):
    """
    In order to start a pilot on the newly created CR, we need to define
    a resource description for that CR.  To do so, we programatically create
    a clone of the local.localhost description, and replace the job submission
    URL with an ssh:// URL pointing to the CR.
    """

    if not cr:

        class _CR(object):
            def __init__(self):
                self.access = 'ssh://remote.host.net:1234/'

        cr = _CR()

    # get the local resource config
    session = rp.Session()
    cfg = session.get_resource_config('local.localhost')

    # create a new config based on the local one, and add it back
    new_cfg = rp.ResourceConfig('ec2.vm', cfg)
    new_cfg.schemas = ['ssh']
    new_cfg['ssh']['job_manager_endpoint'] = cr.access
    new_cfg['ssh']['filesystem_endpoint'] = cr.access

    # the new config needs to make sure we can bootstrap on the VM
    new_cfg['pre_bootstrap_1'] = [
        'sudo apt-get update',
        'sudo apt-get install -y python-virtualenv python-dev dnsutils bc'
    ]
    session.add_resource_config(new_cfg)

    # use the *same* ssh key for ssh access to the VM
    ssh_ctx = rs.Context('SSH')
    ssh_ctx.user_id = 'admin'
    ssh_ctx.user_key = os.environ['EC2_KEYPAIR']
    session.contexts.append(ssh_ctx)

    # submit a pilot to it.
    pd = rp.ComputePilotDescription()
    pd.resource = 'ec2.vm'
    pd.runtime = 10
    pd.cores = 1
    pd.exit_on_error = True,

    pmgr = rp.PilotManager(session=session)
    return pmgr.submit_pilots(pd)
예제 #22
0
def setup_gordon(request):

    session1 = rp.Session()

    print "session id gordon: {0}".format(session1.uid)


    c = rp.Context('ssh')
    c.user_id = CONFIG["xsede.gordon"]["user_id"]
    session1.add_context(c)

    try:
        pmgr1 = rp.PilotManager(session=session1)

        print "pm id gordon: {0}".format(pmgr1.uid)

        umgr1 = rp.UnitManager (session=session1,
                               scheduler=rp.SCHEDULER_DIRECT_SUBMISSION)

        pdesc1 = rp.ComputePilotDescription()
        pdesc1.resource = "xsede.gordon"
        pdesc1.project  = CONFIG["xsede.gordon"]["project"]
        pdesc1.runtime  = 30
        pdesc1.cores    = 16
        pdesc1.cleanup  = False

        pilot1 = pmgr1.submit_pilots(pdesc1)
        pilot1.register_callback(pilot_state_cb)

        umgr1.add_pilots(pilot1)

    except Exception as e:
        print 'test failed'
        raise

    def fin():
        print "finalizing..."
        pmgr1.cancel_pilots()       
        pmgr1.wait_pilots() 

        print 'closing session'
        session1.close()
        time.sleep(5)

    request.addfinalizer(fin)

    return session1, pilot1, pmgr1, umgr1, "xsede.gordon"
예제 #23
0
    def test__issue_114_part_1(self):
        """ https://github.com/radical-cybertools/radical.pilot/issues/114
        """
        session = rp.Session(database_url=DBURL, database_name=DBNAME)

        pm = rp.PilotManager(session=session)

        cpd = rp.ComputePilotDescription()
        cpd.resource = "local.localhost"
        cpd.cores = 1
        cpd.runtime = 5
        cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests"
        cpd.cleanup = True

        pilot = pm.submit_pilots(pilot_descriptions=cpd)
        state = pm.wait_pilots(state=[rp.PMGR_ACTIVE, rp.DONE, rp.FAILED],
                               timeout=5 * 60)

        assert (pilot.state == rp.PMGR_ACTIVE), "pilot state: %s" % pilot.state

        um = rp.UnitManager(session=session,
                            scheduler=rp.SCHED_DIRECT_SUBMISSION)
        um.add_pilots(pilot)

        all_tasks = []

        for i in range(0, 2):
            cudesc = rp.ComputeUnitDescription()
            cudesc.cores = 1
            cudesc.executable = "/bin/sleep"
            cudesc.arguments = ['60']
            all_tasks.append(cudesc)

        units = um.submit_units(all_tasks)
        states = um.wait_units(state=[rp.SCHEDULING, rp.AGENT_EXECUTING],
                               timeout=2 * 60)

        assert rp.SCHEDULING in states, "states: %s" % states

        states = um.wait_units(state=[rp.AGENT_EXECUTING, rp.DONE],
                               timeout=1 * 60)

        assert rp.AGENT_EXECUTING in states, "states: %s" % states

        session.close()
예제 #24
0
def test_rp_basic_task(rp_config):
    rp = rp_config['rp']

    # Note: Session creation will fail with a FileNotFound error unless venv is explicitly `activate`d.
    # TODO: Figure out what `activate` does that `rp-venv/bin/python` doesn't do.
    with rp.Session() as session:
        # Based on `radical.pilot/examples/config.json`
        # TODO: Does the Session have a default spec for 'local.localhost'? Can/should we reference it?
        # See also https://github.com/radical-cybertools/radical.pilot/issues/2181
        resource = 'local.localhost'
        resource_config = {resource: {}}
        if resource in rp_config['config']:
            resource_config[resource].update(rp_config.config[resource])
        resource_config[resource].update({
            'project': None,
            'queue': None,
            'schema': None,
            'cores': 1,
            'gpus': 0
        })

        pilot_description = dict(resource=resource,
                                 runtime=30,
                                 exit_on_error=True,
                                 project=resource_config[resource]['project'],
                                 queue=resource_config[resource]['queue'],
                                 cores=resource_config[resource]['cores'],
                                 gpus=resource_config[resource]['gpus'])

        task_description = {
            'executable': '/bin/date',
            'cpu_processes': 1,
        }

        pmgr = rp.PilotManager(session=session)
        umgr = rp.UnitManager(session=session)
        pilot = pmgr.submit_pilots(
            rp.ComputePilotDescription(pilot_description))
        task = umgr.submit_units(rp.ComputeUnitDescription(task_description))

        umgr.add_pilots(pilot)
        umgr.wait_units()

        assert task.exit_code == 0
    assert session.closed
예제 #25
0
def setup_comet(request):

    session2 = rp.Session()

    print "session id comet: {0}".format(session2.uid)

    c = rp.Context('ssh')
    c.user_id = CONFIG["xsede.comet"]["user_id"]
    session2.add_context(c)

    try:
        pmgr2 = rp.PilotManager(session=session2)

        print "pm id gordon: {0}".format(pmgr2.uid)

        umgr2 = rp.UnitManager(session=session2,
                               scheduler=rp.SCHED_DIRECT_SUBMISSION)

        pdesc2 = rp.ComputePilotDescription()
        pdesc2.resource = "xsede.comet"
        pdesc2.project = CONFIG["xsede.comet"]["project"]
        pdesc2.runtime = 30
        pdesc2.cores = 24
        pdesc2.cleanup = False

        pilot2 = pmgr2.submit_pilots(pdesc2)
        pilot2.register_callback(pilot_state_cb)

        umgr2.add_pilots(pilot2)

    except Exception as e:
        print 'test failed'
        raise

    def fin():
        print "finalizing..."
        pmgr2.cancel_pilots()
        pmgr2.wait_pilots()

        print 'closing session'
        session2.close()

    request.addfinalizer(fin)

    return session2, pilot2, pmgr2, umgr2, "xsede.comet"
예제 #26
0
def setup_stampede_two(request):

    session3 = rp.Session()

    print "session id stampede: {0}".format(session3.uid)

    c = rp.Context('ssh')
    c.user_id = CONFIG["xsede.stampede"]["user_id"]
    session3.add_context(c)

    try:
        pmgr3 = rp.PilotManager(session=session3)

        print "pm id stampede: {0}".format(pmgr3.uid)

        umgr3 = rp.UnitManager(session=session3,
                               scheduler=rp.SCHED_DIRECT_SUBMISSION)

        pdesc3 = rp.ComputePilotDescription()
        pdesc3.resource = "xsede.stampede"
        pdesc3.project = CONFIG["xsede.stampede"]["project"]
        pdesc3.runtime = 20
        pdesc3.cores = int(CONFIG["xsede.stampede"]["cores"]) * 2
        pdesc3.cleanup = False

        pilot3 = pmgr3.submit_pilots(pdesc3)
        pilot3.register_callback(pilot_state_cb)

        umgr3.add_pilots(pilot3)

    except Exception as e:
        print 'test failed'
        raise

    def fin():
        print "finalizing..."
        pmgr3.cancel_pilots()
        pmgr3.wait_pilots()

        print 'closing session'
        session3.close()

    request.addfinalizer(fin)

    return session3, pilot3, pmgr3, umgr3, "xsede.stampede"
def setup_stampede_683(request):

    session = rp.Session()

    print "session id stampede: {0}".format(session.uid)

    c = rp.Context('ssh')
    c.user_id = CONFIG["xsede.stampede"]["user_id"]
    session.add_context(c)

    try:
        pmgr = rp.PilotManager(session=session)

        umgr = rp.UnitManager(session=session,
                              scheduler=rp.SCHEDULER_BACKFILLING)

        pdesc = rp.ComputePilotDescription()
        pdesc.resource = "xsede.stampede"
        pdesc.project = CONFIG["xsede.stampede"]["project"]
        pdesc.runtime = 40
        pdesc.cores = 683
        pdesc.cleanup = False

        pilot = pmgr.submit_pilots(pdesc)
        pilot.register_callback(pilot_state_cb)

        umgr.add_pilots(pilot)

    except Exception as e:
        print 'test failed'
        raise

    def fin():
        print "finalizing..."
        pmgr.cancel_pilots()
        pmgr.wait_pilots()

        print 'closing session'
        session.close()

    request.addfinalizer(fin)

    return session, pilot, pmgr, umgr, "xsede.stampede"
예제 #28
0
    def test__pilotmanager_reconnect(self):
        """ Test if pilot manager re-connect works as expected.
        """
        session = rp.Session()

        pm = rp.PilotManager(session=session)
        assert session.list_pilot_managers() == [
            pm.uid
        ], "Wrong list of pilot managers"

        pm_r = session.get_pilot_managers(pilot_manager_ids=pm.uid)

        assert session.list_pilot_managers() == [
            pm_r.uid
        ], "Wrong list of pilot managers"

        assert pm.uid == pm_r.uid, "Pilot Manager IDs not matching!"

        session.close()
예제 #29
0
    def test__issue_114_part_2(self):
        """ https://github.com/radical-cybertools/radical.pilot/issues/114
        """
        session = rp.Session(database_url=DBURL, database_name=DBNAME)

        pm = rp.PilotManager(session=session)

        cpd = rp.ComputePilotDescription()
        cpd.resource = "local.localhost"
        cpd.cores = 1
        cpd.runtime = 5
        cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests"
        cpd.cleanup = True

        pilot = pm.submit_pilots(pilot_descriptions=cpd)

        um = rp.UnitManager(session=session,
                            scheduler=rp.SCHED_DIRECT_SUBMISSION)
        um.add_pilots(pilot)

        state = pm.wait_pilots(state=[rp.ACTIVE, rp.DONE, rp.FAILED],
                               timeout=5 * 60)

        assert (pilot.state == rp.ACTIVE), "pilot state: %s" % pilot.state

        cudesc = rp.ComputeUnitDescription()
        cudesc.cores = 1
        cudesc.executable = "/bin/sleep"
        cudesc.arguments = ['60']

        cu = um.submit_units(cudesc)
        state = um.wait_units(state=[rp.EXECUTING], timeout=60)

        assert state == [rp.EXECUTING], 'state   : %s' % state
        assert cu.state == rp.EXECUTING, 'cu state: %s' % cu.state

        state = um.wait_units(timeout=2 * 60)

        assert state == [rp.DONE], 'state   : %s' % state
        assert cu.state == rp.DONE, 'cu state: %s' % cu.state

        session.close()
예제 #30
0
def setup_local_1(request):

    session1 = rp.Session()

    print "session id local_1: {0}".format(session1.uid)

    try:
        pmgr1 = rp.PilotManager(session=session1)

        print "pm id local_1: {0}".format(pmgr1.uid)

        umgr1 = rp.UnitManager(session=session1,
                               scheduler=rp.SCHED_DIRECT_SUBMISSION)

        pdesc1 = rp.ComputePilotDescription()
        pdesc1.resource = "local.localhost"
        pdesc1.runtime = 30
        pdesc1.cores = 1
        pdesc1.cleanup = False

        pilot1 = pmgr1.submit_pilots(pdesc1)
        pilot1.register_callback(pilot_state_cb)

        umgr1.add_pilots(pilot1)

    except Exception as e:
        print 'test failed'
        raise

    def fin():
        print "finalizing..."
        pmgr1.cancel_pilots()
        pmgr1.wait_pilots()

        print 'closing session'
        session1.close()
        time.sleep(5)

    request.addfinalizer(fin)

    return session1, pilot1, pmgr1, umgr1, "local.localhost"