def test_ids () : """ Test ID generation """ id_a_1 = ru.generate_id ('a.') id_a_2 = ru.generate_id ('a.') id_a_3 = ru.generate_id ('a.', mode=ru.ID_SIMPLE) id_b_1 = ru.generate_id ('b.', mode=ru.ID_SIMPLE) id_b_2 = ru.generate_id ('b.', mode=ru.ID_SIMPLE) id_c_1 = ru.generate_id ('c.', mode=ru.ID_UNIQUE) id_c_2 = ru.generate_id ('c.', mode=ru.ID_UNIQUE) assert (id_a_1 == 'a.0001' ), "'%s' == 'a.0001'" % (id_a_1) assert (id_a_2 == 'a.0002' ), "'%s' == 'a.0002'" % (id_a_2) assert (id_a_3 == 'a.0003' ), "'%s' == 'a.0003'" % (id_a_3) assert (id_b_1 == 'b.0001' ), "'%s' == 'b.0001'" % (id_b_1) assert (id_b_2 == 'b.0002' ), "'%s' == 'b.0002'" % (id_b_2) assert (id_c_1.startswith ('c.')), "'%s'.startswith ('c.')" % (id_c_1) assert (id_c_2.startswith ('c.')), "'%s'.startswith ('c.')" % (id_c_2) assert (id_c_1.endswith ('.0001')), "'%s'.endswith ('0001')" % (id_c_1) assert (id_c_2.endswith ('.0002')), "'%s'.endswith ('0002')" % (id_c_2) try : id_x = ru.generate_id (None) except TypeError : pass except Exception as e : assert (False), "TypeError != %s" % type(e) try : id_x = ru.generate_id (1) except TypeError : pass except Exception as e : assert (False), "TypeError != %s" % type(e) try : id_x = ru.generate_id ('a.', mode='RANDOM') except ValueError : pass except Exception as e : assert (False), "ValueError != %s" % type(e)
def test_resolve_placeholder(): """ **Purpose**: Test if the 'resolve_placeholder' function resolves expected placeholders correctly. These placeholders are used to refer to files in different task folders. """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholders = { pipeline: { stage: { task: { 'path': '/home/vivek', 'rts_uid': 'unit.0002' } } } } # Test only strings are accepted raw_paths = [1, list(), dict()] for raw_path in raw_paths: with pytest.raises(TypeError): resolve_placeholders(raw_path, placeholders) # Test when no placeholders to resolve raw_path = '/home/vivek/some_file.txt' new_path = resolve_placeholders(raw_path, placeholders) assert new_path == raw_path # Test for shared data location raw_path = '$SHARED/test.txt' new_path = resolve_placeholders(raw_path, placeholders) assert new_path == 'pilot:///test.txt' # Test for shared data location with rename raw_path = '$SHARED/test.txt > new.txt' new_path = resolve_placeholders(raw_path, placeholders) assert new_path == 'pilot:///test.txt > new.txt' # Test for resolving relative data references raw_path = '$Pipeline_%s_Stage_%s_Task_%s/some_file.txt' \ % (pipeline, stage, task) new_path = resolve_placeholders(raw_path, placeholders) assert new_path == '/home/vivek/some_file.txt' # Test for resolving relative data references with rename raw_path = '$Pipeline_%s_Stage_%s_Task_%s/some_file.txt > new.txt' \ % (pipeline, stage, task) new_path = resolve_placeholders(raw_path, placeholders) assert new_path == '/home/vivek/some_file.txt > new.txt' # Test only placeholders in $Pipeline_%s_Stage_%s_Task_%s are accepted raw_path = '$Task_2' with pytest.raises(ValueError): resolve_placeholders(raw_path, placeholders)
def test_resolve_args(): pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) t1 = str(ru.generate_id('task')) t2 = str(ru.generate_id('task')) placeholder_dict = { pipeline: { stage: { t1: { 'path': '/home/vivek/t1', 'rts_uid': 'unit.0002' }, t2: { 'path': '/home/vivek/t2', 'rts_uid': 'unit.0003' } } } } arguments = [ '$SHARED', '$Pipeline_%s_Stage_%s_Task_%s' % (pipeline, stage, t1), '$Pipeline_%s_Stage_%s_Task_%s' % (pipeline, stage, t2) ] assert resolve_arguments(arguments, placeholder_dict) == [ '$RP_PILOT_STAGING', '/home/vivek/t1', '/home/vivek/t2' ]
def __init__(self, config_path=None, hostname=None, port=None, reattempts=None, resubmit_failed=None, autoterminate=None, write_workflow=None, rts=None, rmq_cleanup=None, rts_config=None, name=None): # Create a session for each EnTK script execution if name: self._name = name self._sid = name else: self._name = str() self._sid = ru.generate_id('re.session', ru.ID_PRIVATE) self._read_config(config_path, hostname, port, reattempts, resubmit_failed, autoterminate, write_workflow, rts, rmq_cleanup, rts_config) # Create an uid + logger + profiles for AppManager, under the sid # namespace path = os.getcwd() + '/' + self._sid self._uid = ru.generate_id('appmanager.%(item_counter)04d', ru.ID_CUSTOM, namespace=self._sid) self._logger = ru.Logger('radical.entk.%s' % self._uid, path=path, targets=['2', '.']) self._prof = ru.Profiler(name='radical.entk.%s' % self._uid, path=path) self._report = ru.Reporter(name='radical.entk.%s' % self._uid) self._report.info('EnTK session: %s\n' % self._sid) self._prof.prof('create amgr obj', uid=self._uid) self._report.info('Creating AppManager') self._resource_manager = None # RabbitMQ Queues self._pending_queue = list() self._completed_queue = list() # Global parameters to have default values self._mqs_setup = False self._resource_desc = None self._task_manager = None self._workflow = None self._cur_attempt = 1 self._shared_data = list() self._rmq_ping_interval = os.getenv('RMQ_PING_INTERVAL', 10) self._logger.info('Application Manager initialized') self._prof.prof('amgr obj created', uid=self._uid) self._report.ok('>>ok\n')
def test_resolve_placeholder(): """ **Purpose**: Test if the 'resolve_placeholder' function resolves expected placeholders correctly. These placeholders are used to refer to files in different task folders. """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholder_dict = { pipeline: { stage: { task: { 'path': '/home/vivek', 'rts_uid': 'unit.0002' } } } } # Test only strings are accepted raw_paths = [1, list(), dict()] for raw_path in raw_paths: with pytest.raises(TypeError): resolve_placeholders(raw_path, placeholder_dict) # Test when no placeholders to resolve raw_path = '/home/vivek/some_file.txt' processed_path = resolve_placeholders(raw_path, placeholder_dict) assert processed_path == raw_path # Test for shared data location raw_path = '$SHARED/test.txt' processed_path = resolve_placeholders(raw_path, placeholder_dict) assert processed_path == 'pilot:///test.txt' # Test for shared data location with rename raw_path = '$SHARED/test.txt > new.txt' processed_path = resolve_placeholders(raw_path, placeholder_dict) assert processed_path == 'pilot:///test.txt > new.txt' # Test for resolving relative data references raw_path = '$Pipeline_%s_Stage_%s_Task_%s/some_file.txt' % (pipeline, stage, task) processed_path = resolve_placeholders(raw_path, placeholder_dict) assert processed_path == '/home/vivek/some_file.txt' # Test for resolving relative data references with rename raw_path = '$Pipeline_%s_Stage_%s_Task_%s/some_file.txt > new.txt' % (pipeline, stage, task) processed_path = resolve_placeholders(raw_path, placeholder_dict) assert processed_path == '/home/vivek/some_file.txt > new.txt' # Test only placeholders in $Pipeline_%s_Stage_%s_Task_%s are accepted raw_path = '$Task_2' with pytest.raises(ValueError): resolve_placeholders(raw_path, placeholder_dict)
def __init__(self, config_path=None, hostname=None, port=None, reattempts=None, resubmit_failed=None, autoterminate=None, write_workflow=None, rts=None, rmq_cleanup=None, rts_config=None, name=None): # Create a session for each EnTK script execution if name: self._name = name self._sid = name else: self._name= str() self._sid = ru.generate_id('re.session', ru.ID_PRIVATE) self._read_config(config_path, hostname, port, reattempts, resubmit_failed, autoterminate, write_workflow, rts, rmq_cleanup, rts_config) # Create an uid + logger + profiles for AppManager, under the sid # namespace path = os.getcwd() + '/' + self._sid self._uid = ru.generate_id('appmanager.%(item_counter)04d', ru.ID_CUSTOM, namespace=self._sid) self._logger = ru.Logger('radical.entk.%s' % self._uid, path=path) self._prof = ru.Profiler(name='radical.entk.%s' % self._uid, path=path) self._report = ru.Reporter(name='radical.entk.%s' % self._uid) self._report.info('EnTK session: %s\n' % self._sid) self._prof.prof('create amgr obj', uid=self._uid) self._report.info('Creating AppManager') self._resource_manager = None # RabbitMQ Queues self._pending_queue = list() self._completed_queue = list() # Global parameters to have default values self._mqs_setup = False self._resource_desc = None self._task_manager = None self._workflow = None self._cur_attempt = 1 self._shared_data = list() self._wfp = None self._sync_thread = None self._rmq_ping_interval = os.getenv('RMQ_PING_INTERVAL', 10) self._logger.info('Application Manager initialized') self._prof.prof('amgr obj created', uid=self._uid) self._report.ok('>>ok\n')
def test_rmgr_rp_initialization(d): with pytest.raises(ValueError): rmgr = RPRmgr(d, 'test.0000', rts_config={}) env_var = os.environ.get('RADICAL_PILOT_DBURL', None) if env_var: del os.environ['RADICAL_PILOT_DBURL'] config={ "sandbox_cleanup": False,"db_cleanup": False} with pytest.raises(EnTKError): rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = RPRmgr(d, rmgr_id, {}) try: import glob import shutil home = os.environ.get('HOME', '/home') test_fold = glob.glob('%s/.radical/utils/test.*' % home) for f in test_fold: shutil.rmtree(f) except: pass os.environ['RADICAL_PILOT_DBURL'] = MLAB rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = RPRmgr(d, rmgr_id, {'db_cleanup': False, 'sandbox_cleanup': False}) assert rmgr._resource_desc == d assert rmgr._sid == rmgr_id assert rmgr._rts == 'radical.pilot' assert rmgr._rts_config == config assert rmgr._resource == None assert rmgr._walltime == None assert rmgr._cpus == 1 assert rmgr._gpus == 0 assert rmgr._project == None assert rmgr._access_schema == None assert rmgr._queue == None assert rmgr._validated == False assert rmgr._uid == 'resource_manager.0000' assert rmgr._logger assert rmgr._prof # Shared data list assert isinstance(rmgr.shared_data, list) assert not rmgr.session assert not rmgr.pmgr assert not rmgr.pilot assert rmgr._download_rp_profile == False assert rmgr._mlab_url
def test_rmgr_rp_initialization(d): with pytest.raises(ree.ValueError): sid = ru.generate_id('test', ru.ID_UNIQUE) rmgr = RPRmgr(d, sid, rts_config={}) config = {"sandbox_cleanup": False, "db_cleanup" : False} try: home = os.environ.get('HOME', '/home') folder = glob.glob('%s/.radical/utils/test.*' % home) for f in folder: shutil.rmtree(f) except: pass sid = ru.generate_id('test', ru.ID_UNIQUE) rmgr = RPRmgr(d, sid, {'db_cleanup' : False, 'sandbox_cleanup': False}) assert rmgr._resource_desc == d assert rmgr._sid == sid assert rmgr._rts == 'radical.pilot' assert rmgr._rts_config == config assert rmgr._resource is None assert rmgr._walltime is None assert rmgr._cpus == 1 assert rmgr._gpus == 0 assert rmgr._project is None assert rmgr._access_schema is None assert rmgr._queue is None assert rmgr._validated is False assert rmgr._download_rp_profile is False # rmgr id is incremental, and it is valid as long as it is in the range prefix, uid = rmgr._uid.split(".") assert prefix == 'resource_manager' assert int(uid) >= 0 assert int(uid) <= 9999 assert rmgr._logger assert rmgr._prof assert not rmgr.session assert not rmgr.pmgr assert not rmgr.pilot assert isinstance(rmgr.shared_data, list)
def __init__ (self) : if not hasattr (self, '_apitype') : self._apitype = self._get_apitype () self._logger = rul.getLogger ('saga', self._apitype) self._id = ru.generate_id (self._get_apitype () + '.', mode=ru.ID_SIMPLE)
def __init__(self, atype): self._atype = atype self._pid = os.getpid() self._uid = ru.generate_id("%s" % self._atype) self.logger = ru.Logger('radical.synapse.self._uid') # storage for temporary data and statistics # self._tmpdir = "/scratch/synapse/" # FIXME self._tmpdir = "/tmp/" # FIXME try: os.makedirs(self._tmpdir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(self._tmpdir): pass else: raise # start worker process self._work_queue = Queue.Queue() self._result_queue = Queue.Queue() self._term = threading.Event() self._proc = threading.Thread(target=self.run) self._proc.start()
def __init__ (self, atype): self._atype = atype self._pid = os.getpid () self._uid = ru.generate_id ("%s" % self._atype) self.logger = ru.get_logger('radical.synapse.self._uid') # storage for temporary data and statistics # self._tmpdir = "/scratch/synapse/" # FIXME self._tmpdir = "/tmp/" # FIXME try: os.makedirs (self._tmpdir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir (self._tmpdir): pass else: raise # start worker process self._work_queue = Queue.Queue () self._result_queue = Queue.Queue () self._term = threading.Event() self._proc = threading.Thread (target=self.run) self._proc.start ()
def test_rmgr_rp_resource_request(): ''' ***Purpose***: Test the submission and cancelation of a resource request. Check states that pilot starts and ends with. ''' res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1, 'project': '' } # os.environ['RP_ENABLE_OLD_DEFINES'] = 'True' config = {'sandbox_cleanup': False, 'db_cleanup': False} rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = RPRmgr(res_dict, sid=rmgr_id, rts_config=config) rmgr._validate_resource_desc() rmgr._populate() rmgr._submit_resource_request() rmgr._terminate_resource_request() # State transition seems to be taking some time. So sleep from time import sleep sleep(10) assert rmgr.get_resource_allocation_state() in rp.FINAL
def __init__ (self, descr) : """ Create a new workload dependency element, aka Relation, according to the description.. Each new relation is assigned a new ID. Later implementations may allow for an additional id parameter, to reconnect to the thus identified relation instance. """ # initialize state tid = ru.generate_id ('r.') if not 'head' in descr : raise ValueError ("no 'head' in RelationDescription") if not 'tail' in descr : raise ValueError ("no 'tail' in RelationDescription") tu.Properties.__init__ (self, descr) # register properties self.register_property ('id') self.register_property ('head') self.register_property ('tail') self.register_property ('description') # initialize essential properties self.id = tid self.head = descr.head self.tail = descr.tail self.description = descr
def __init__(self, session, db_obj, cb_buffer, scheduler=None): self._uid = ru.generate_id('task_manager.rp') self._logger = ru.get_logger('task_manager.rp') self._session = session self._scheduler = scheduler # NOTE if the cu.uid update is moved then the db obj can be # entirely removed from the task manager since it # acts throught the buffer (ie move this update to buffer) self._db_obj = db_obj self._cb_buffer = cb_buffer self._running_tasks = list() # This dict will take each batch of tasks # and check that they are running, or else # cancel them after a waiting period self._running_mgr = Manager() # # structure of this dict: # batch info need2check need2kill # { (starttime, waittime) : [ [cu.uid, cu.uid, ... ], [cu.uid, cu.uid, ... ] ], # () : [ ... ], # ... # } # self._running_checklist = self._running_mgr.dict() self._initialize()
def test_rmgr_rp_completed_states(): config = {"sandbox_cleanup": False, "db_cleanup": False} sid = ru.generate_id('test', ru.ID_UNIQUE) rmgr = RPRmgr({}, sid=sid, rts_config=config) assert rmgr.get_completed_states() == rp.FINAL
def __init__(self, resource='local', comm_server=None): """The workhorse of high throughput binding affinity calculations. Manages arbitrary number of protocols on any resource (including supercomputers). Parameters ---------- resource: str The name of the resource where the protocols will be run. This is usually then name of the supercomputer or 'local' if the job will be executed locally. (the default is to try to run locally). comm_server: tuple(str, int) The communication server used by the execution system. Specify a hostname and port number as a tuple. If None, then the dedicated server might be used from the resource description if present. """ self.resource = yaml.load(resource_stream(__name__, 'resources.yaml'))[resource] if comm_server is None: comm_server = self.resource.get('dedicated_rabbitmq_server') self._protocols = list() self._app_manager = AppManager(*comm_server) # Profiler for Runner self._uid = ru.generate_id('radical.htbac.workflow_runner') self._logger = ru.get_logger('radical.htbac.workflow_runner') self._prof = ru.Profiler(name=self._uid) self._prof.prof('create workflow_runner obj', uid=self._uid) self._root_directories = list()
def __init__(self): if not hasattr(self, '_apitype'): self._apitype = self._get_apitype() self._logger = rul.getLogger('saga', self._apitype) self._id = ru.generate_id(self._get_apitype() + '.', mode=ru.ID_SIMPLE)
def __init__(self, session, expand=AUTOMATIC, derive=AUTOMATIC): """ Create a new planner instance for this workload. Use the default planner plugin if not indicated otherwise """ self.session = session self.id = ru.generate_id("planner.") tu.Timed.__init__(self, "troy.Planner", self.id) self.session.timed_component(self, "troy.Planner", self.id) self._plugin_mgr = None self.plugins = dict() # setup plugins from aruments # # We leave actual plugin initialization for later, in case a strategy # wants to alter / complete the plugin selection # # FIXME: we don't need no stupid arguments, ey! Just use # AUTOMATIC by default... self.plugins["expand"] = expand self.plugins["derive"] = derive # lets see if there are any plugin preferences in the config # note that config settings supercede arguments! cfg = session.get_config("planner") if "plugin_planner_derive" in cfg: self.plugins["derive"] = cfg["plugin_planner_derive"] if "plugin_planner_expand" in cfg: self.plugins["expand"] = cfg["plugin_planner_expand"]
def test_tmgr_rp_initialization(): sid = ru.generate_id('test', ru.ID_UNIQUE) cfg = {"sandbox_cleanup": False, "db_cleanup": False} credentials = pika.PlainCredentials(username, password) rmq_conn_params = pika.ConnectionParameters(host=hostname, port=port, credentials=credentials) rmgr = RPRmgr({}, sid, cfg) tmgr = RPTmgr(sid=sid, pending_queue=['pending'], completed_queue=['completed'], rmgr=rmgr, rmq_conn_params=rmq_conn_params) assert 'task_manager' in tmgr._uid assert tmgr._pending_queue == ['pending'] assert tmgr._completed_queue == ['completed'] assert tmgr._rmq_conn_params == rmq_conn_params assert tmgr._rts == 'radical.pilot' assert tmgr._tmgr_process is None assert tmgr._hb_thread is None
def test_tmgr_rp_tmgr(): os.environ['RADICAL_PILOT_DBURL'] = MLAB os.environ['ENTK_HB_INTERVAL'] = '30' res_dict = { 'resource': 'local.localhost', 'walltime': 40, 'cpus': 20, } config={ "sandbox_cleanup": False,"db_cleanup": False} rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = RPRmgr(resource_desc=res_dict, sid=rmgr_id, rts_config=config) rmgr._validate_resource_desc() rmgr._populate() rmgr._submit_resource_request() tmgr = RPTmgr(sid=rmgr_id, pending_queue=['pendingq-1'], completed_queue=['completedq-1'], rmgr=rmgr, mq_hostname=hostname, port=port) tmgr.start_manager() proc = Process(target=func_for_mock_tmgr_test, args=(hostname, port, tmgr._pending_queue[0], tmgr._completed_queue[0])) proc.start() proc.join() tmgr.terminate_manager() rmgr._terminate_resource_request()
def __init__(self, num_tasks=0, ops_dist='uniform', dist_mean=10, dist_var=0, no_uid=False): # Initialize self._uid = None self._dist_options = ['uniform', 'normal'] self._task_list = list() if ops_dist not in self._dist_options: raise ValueError("possible distributions are %s" % (','.join(self._dist_options))) self._num_tasks = num_tasks self._ops_dist = ops_dist self._dist_mean = dist_mean self._dist_var = dist_var if not no_uid: self._uid = ru.generate_id('workload') self._create_task_list()
def test_rmgr_rp_resource_request(): """ ***Purpose***: Test the submission and cancelation of a resource request. Check states that pilot starts and ends with. """ res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1, 'project': '' } config = {"sandbox_cleanup": False, "db_cleanup": False} sid = ru.generate_id('test', ru.ID_UNIQUE) rmgr = RPRmgr(res_dict, sid=sid, rts_config=config) rmgr._validate_resource_desc() rmgr._populate() rmgr._submit_resource_request() rmgr._terminate_resource_request() t_0 = time.time() while time.time() - t_0 < 30: state = rmgr.get_resource_allocation_state() if state in rp.FINAL: break time.sleep(1) assert state in [rp.CANCELED, rp.FAILED, rp.DONE]
def __init__ (self) : if not hasattr (self, '_apitype') : self._apitype = self._get_apitype () self._logger = ru.get_logger ('radical.saga') self._id = ru.generate_id (self._get_apitype (), mode=ru.ID_SIMPLE)
def _assign_uid(self, sid): """ Purpose: Assign a uid to the current object based on the sid passed """ self._uid = ru.generate_id('task.%(item_counter)04d', ru.ID_CUSTOM, namespace=sid)
def setUp(self): # Component configuration self.cfg = ru.read_json(cfg_file) self.cfg['session_id'] = session_id self.cfg['resource_sandbox'] = resource_sandbox self.cfg['session_sandbox'] = session_sandbox self.cfg['pilot_sandbox'] = pilot_sandbox self.cfg['unit_sandbox'] = unit_sandbox # Unit Configuration self.unit = dict() self.unit['uid'] = 'unit.000000' self.unit['unit_sandbox'] = self.cfg['unit_sandbox'] self.unit['pilot_sandbox'] = self.cfg['pilot_sandbox'] self.unit['resource_sandbox'] = self.cfg['resource_sandbox'] self.unit['description'] = {'input_staging': [{ 'uid': ru.generate_id('sd'), 'source': 'client://' + sample_data_folder + '/file', 'action': rp.TARBALL, 'target': 'unit:///file', 'flags': rp.DEFAULT_FLAGS, 'priority': 0}] }
def __init__(self, resource_desc, sid, rts, rts_config): if not isinstance(resource_desc, dict): raise TypeError(expected_type=dict, actual_type=type(resource_desc)) self._resource_desc = resource_desc self._sid = sid self._rts = rts self._rts_config = rts_config # Resource reservation related parameters self._resource = None self._walltime = None self._cpus = 1 self._gpus = 0 self._project = None self._access_schema = None self._queue = None self._validated = False # Utility parameters self._uid = ru.generate_id('resource_manager.%(item_counter)04d', ru.ID_CUSTOM, namespace=self._sid) self._path = os.getcwd() + '/' + self._sid self._logger = ru.Logger('radical.entk.%s' % self._uid, path=self._path) self._prof = ru.Profiler(name='radical.entk.%s' % self._uid, path=self._path) # Shared data list self._shared_data = list()
def create(unit_manager_obj, unit_description, local_state): """ PRIVATE: Create a new compute unit. """ # create and return pilot object computeunit = ComputeUnit() # Make a copy of the UD to work on without side-effects. ud_copy = copy.deepcopy(unit_description) # sanity check on description if (not 'executable' in unit_description or \ not unit_description['executable'] ) and \ (not 'kernel' in unit_description or \ not unit_description['kernel'] ) : raise PilotException( "ComputeUnitDescription needs an executable or application kernel" ) # If staging directives exist, try to expand them if ud_copy.input_staging: ud_copy.input_staging = expand_staging_directive( ud_copy.input_staging, logger) if ud_copy.output_staging: ud_copy.output_staging = expand_staging_directive( ud_copy.output_staging, logger) computeunit._description = ud_copy computeunit._manager = unit_manager_obj computeunit._worker = unit_manager_obj._worker computeunit._uid = ru.generate_id('unit.%(counter)06d', ru.ID_CUSTOM) computeunit._name = unit_description['name'] computeunit._local_state = local_state return computeunit
def __init__(self, workflow, pending_queue, completed_queue, mq_hostname): self._uid = ru.generate_id('radical.entk.wfprocessor') self._logger = ru.get_logger('radical.entk.wfprocessor') self._workflow = workflow if not isinstance(pending_queue, list): raise TypeError(expected_type=list, actual_type=type(pending_queue)) if not isinstance(completed_queue, list): raise TypeError(expected_type=list, actual_type=type(completed_queue)) if not isinstance(mq_hostname, str): raise TypeError(expected_type=str, actual_type=type(mq_hostname)) # Mqs queue names and channel self._pending_queue = pending_queue self._completed_queue = completed_queue self._mq_hostname = mq_hostname self._wfp_process = None self._resubmit_failed = False self._logger.info('Created WFProcessor object: %s' % self._uid)
def test_rmgr_rp_resource_request(): """ ***Purpose***: Test the submission and cancelation of a resource request. Check states that pilot starts and ends with. """ res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB os.environ['RP_ENABLE_OLD_DEFINES'] = 'True' config = {"sandbox_cleanup": False, "db_cleanup": False} rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = RPRmgr(res_dict, sid=rmgr_id, rts_config=config) rmgr._validate_resource_desc() rmgr._populate() rmgr._submit_resource_request() import radical.pilot as rp rmgr._terminate_resource_request() # State transition seems to be taking some time. So sleep from time import sleep sleep(30) assert rmgr.get_resource_allocation_state( ) in [rp.CANCELED, rp.FAILED, rp.DONE]
def __init__(self, resource_desc, db): self._uid = ru.generate_id('resource_manager.rp') self._logger = ru.get_logger('resource_manager.rp') self._mlab_url = os.environ.get('RADICAL_PILOT_DBURL',None) if not self._mlab_url: raise Error(msg='RADICAL_PILOT_DBURL not defined. Please assign a valid mlab url') self._session = None self._pmgr = None self._pilot = None self._resource = None self._runtime = None self._cores = None self._gpus = None self._project = None self._access_schema = None self._queue = None self._directory = os.path.dirname(os.path.abspath(__file__)); self._db = db if self._validate_resource_desc(resource_desc): self._populate(resource_desc) else: raise Error(msg='Resource description incorrect')
def __init__(self, sid, workflow, pending_queue, completed_queue, resubmit_failed, rmq_conn_params): # Mandatory arguments self._sid = sid self._pending_queue = pending_queue self._completed_queue = completed_queue self._resubmit_failed = resubmit_failed self._rmq_conn_params = rmq_conn_params # Assign validated workflow self._workflow = workflow # Create logger and profiler at their specific locations using the sid self._path = os.getcwd() + '/' + self._sid self._uid = ru.generate_id('wfprocessor.%(item_counter)04d', ru.ID_CUSTOM, ns=self._sid) name = 'radical.entk.%s' % self._uid self._logger = ru.Logger(name, path=self._path) self._prof = ru.Profiler(name, path=self._path) self._report = ru.Reporter(name) # Defaults self._wfp_process = None self._enqueue_thread = None self._dequeue_thread = None self._rmq_ping_interval = os.getenv('RMQ_PING_INTERVAL', 10) self._logger.info('Created WFProcessor object: %s' % self._uid) self._prof.prof('create_wfp', uid=self._uid)
def test_tmgr_rp_tmgr(): res_dict = {'resource': 'local.localhost', 'walltime': 40, 'cpus': 20} config = {"sandbox_cleanup": False, "db_cleanup": False} rmgr_id = ru.generate_id('test', ru.ID_UNIQUE) rmgr = RPRmgr(resource_desc=res_dict, sid=rmgr_id, rts_config=config) rmq_conn_params = pika.ConnectionParameters(host=hostname, port=port) rmgr._validate_resource_desc() rmgr._populate() rmgr._submit_resource_request() tmgr = RPTmgr(sid=rmgr_id, pending_queue=['pendingq-1'], completed_queue=['completedq-1'], rmgr=rmgr, rmq_conn_params=rmq_conn_params) tmgr.start_manager() proc = mp.Process(target=func_for_mock_tmgr_test, args=(hostname, port, tmgr._pending_queue[0], tmgr._completed_queue[0])) proc.start() proc.join() tmgr.terminate_manager() rmgr._terminate_resource_request()
def __init__(self, number_of_replicas, systems=list(), workflow=None, cores=32, ligand=False, full=False, gibbs_steps, thermodynamic_states): self.number_of_replicas = number_of_replicas self.n_gibbs_steps = gibbs_steps self.thermo_state = thermodynamic_states self.ligand = '-ligands' if ligand else '' self.step_count = _full_steps if full else _reduced_steps self.systems = systems self.cores = cores self._id = uuid.uuid1() # generate id # self.workflow = workflow or ['gen_replicas', 'repex', 'rotation', 'translation', 'propagation'] # null workflow self.workflow = workflow or list(range(0,5)) # Profiler for TIES PoE self._uid = ru.generate_id('radical.yank.yank-repex') self._logger = ru.get_logger('radical.yank.yank-repex') self._prof = ru.Profiler(name=self._uid) self._prof.prof('create yank-repex instance', uid=self._uid)
def test_tmgr_rp_tmgr(): os.environ['RADICAL_PILOT_DBURL'] = MLAB os.environ['ENTK_HB_INTERVAL'] = '30' res_dict = { 'resource': 'local.localhost', 'walltime': 40, 'cpus': 20, } config = {"sandbox_cleanup": False, "db_cleanup": False} rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = RPRmgr(resource_desc=res_dict, sid=rmgr_id, rts_config=config) rmgr._validate_resource_desc() rmgr._populate() rmgr._submit_resource_request() tmgr = RPTmgr(sid=rmgr_id, pending_queue=['pendingq-1'], completed_queue=['completedq-1'], rmgr=rmgr, mq_hostname=hostname, port=port) tmgr.start_manager() proc = Process(target=func_for_mock_tmgr_test, args=(hostname, port, tmgr._pending_queue[0], tmgr._completed_queue[0])) proc.start() proc.join() tmgr.terminate_manager() rmgr._terminate_resource_request()
def submit(self): thing = {'type' : 'thing', 'uid' : ru.generate_id('thing.'), 'state' : 'NEW'} self.advance(thing, 'CLIENT_INPUT_PENDING', publish=True, push=True)
def __init__(self): self._uid = ru.generate_id('radical.entk.task') self._name = str() self._state = states.NEW # Attributes necessary for execution self._environment = list() self._pre_exec = list() self._executable = list() self._arguments = list() self._post_exec = list() # Data staging attributes self._upload_input_data = list() self._copy_input_data = list() self._link_input_data = list() self._copy_output_data = list() self._download_output_data = list() ## The following help in updation # Stage this task belongs to self._parent_stage = None # Pipeline this task belongs to self._parent_pipeline = None
def create(unit_manager_obj, unit_description, local_state): """ PRIVATE: Create a new compute unit. """ # create and return pilot object computeunit = ComputeUnit() # Make a copy of the UD to work on without side-effects. ud_copy = copy.deepcopy(unit_description) # sanity check on description if (not 'executable' in unit_description or \ not unit_description['executable'] ) and \ (not 'kernel' in unit_description or \ not unit_description['kernel'] ) : raise PilotException ("ComputeUnitDescription needs an executable or application kernel") # If staging directives exist, try to expand them if ud_copy.input_staging: ud_copy.input_staging = expand_staging_directive(ud_copy.input_staging, logger) if ud_copy.output_staging: ud_copy.output_staging = expand_staging_directive(ud_copy.output_staging, logger) computeunit._description = ud_copy computeunit._manager = unit_manager_obj computeunit._worker = unit_manager_obj._worker computeunit._uid = ru.generate_id('unit.%(counter)06d', ru.ID_CUSTOM) computeunit._name = unit_description['name'] computeunit._local_state = local_state return computeunit
def test_rmgr_base_initialization(d): try: import glob import shutil import os home = os.environ.get('HOME', '/home') test_fold = glob.glob('%s/.radical/utils/test.*' % home) for f in test_fold: shutil.rmtree(f) except: pass rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = BaseRmgr(d, rmgr_id, None, {}) assert rmgr._resource_desc == d assert rmgr._sid == rmgr_id assert rmgr._rts == None assert rmgr._rts_config == {} assert rmgr.resource == None assert rmgr.walltime == None assert rmgr.cpus == 1 assert rmgr.gpus == 0 assert rmgr.project == None assert rmgr.access_schema == None assert rmgr.queue == None assert rmgr._validated == False assert rmgr._uid == 'resource_manager.0000' assert rmgr._logger assert rmgr._prof # Shared data list assert isinstance(rmgr.shared_data, list)
def replicate(self, original_task): self._uid = ru.generate_id('radical.entk.task') self._name = original_task.name self._state = states.NEW # Attributes necessary for execution self._pre_exec = original_task.pre_exec self._executable = original_task.executable self._arguments = original_task.arguments self._post_exec = original_task.post_exec # Data staging attributes self._upload_input_data = original_task.upload_input_data self._copy_input_data = original_task.copy_input_data self._link_input_data = original_task.link_input_data self._copy_output_data = original_task.copy_output_data self._download_output_data = original_task.download_output_data ## The following help in updation # Stage this task belongs to self._parent_stage = original_task.parent_stage # Pipeline this task belongs to self._parent_pipeline = original_task.parent_pipeline
def __init__(self, resource_desc, sid, rts, rts_config): if not isinstance(resource_desc, dict): raise TypeError(expected_type=dict, actual_type=type(resource_desc)) self._resource_desc = resource_desc self._sid = sid self._rts = rts self._rts_config = rts_config # Resource reservation related parameters self._resource = None self._walltime = None self._cpus = 1 self._gpus = 0 self._project = None self._access_schema = None self._queue = None self._validated = False # Utility parameters self._uid = ru.generate_id('resource_manager.%(counter)04d', ru.ID_CUSTOM) self._path = os.getcwd() + '/' + self._sid name = 'radical.entk.%s' % self._uid self._logger = ru.Logger (name, path=self._path) self._prof = ru.Profiler(name, path=self._path) self._shared_data = list() self._outputs = None
def __init__(self, sid, pending_queue, completed_queue, rmgr, rmq_conn_params, rts): if not isinstance(sid, str): raise TypeError(expected_type=str, actual_type=type(sid)) if not isinstance(pending_queue, list): raise TypeError(expected_type=str, actual_type=type(pending_queue)) if not isinstance(completed_queue, list): raise TypeError(expected_type=str, actual_type=type(completed_queue)) if not isinstance(rmgr, Base_ResourceManager): raise TypeError(expected_type=Base_ResourceManager, actual_type=type(rmgr)) if not isinstance(rmq_conn_params, pika.connection.ConnectionParameters): raise TypeError(expected_type=pika.connection.ConnectionParameters, actual_type=type(rmq_conn_params)) self._sid = sid self._pending_queue = pending_queue self._completed_queue = completed_queue self._rmgr = rmgr self._rts = rts self._rmq_conn_params = rmq_conn_params # Utility parameters self._uid = ru.generate_id('task_manager.%(counter)04d', ru.ID_CUSTOM) self._path = os.getcwd() + '/' + self._sid name = 'radical.entk.%s' % self._uid self._log = ru.Logger(name, path=self._path) self._prof = ru.Profiler(name, path=self._path) self._dh = ru.DebugHelper(name=name) # Thread should run till terminate condtion is encountered mq_connection = pika.BlockingConnection(rmq_conn_params) self._hb_request_q = '%s-hb-request' % self._sid self._hb_response_q = '%s-hb-response' % self._sid mq_channel = mq_connection.channel() # To respond to heartbeat - get request from rpc_queue mq_channel.queue_delete(queue=self._hb_response_q) mq_channel.queue_declare(queue=self._hb_response_q) # To respond to heartbeat - get request from rpc_queue mq_channel.queue_delete(queue=self._hb_request_q) mq_channel.queue_declare(queue=self._hb_request_q) self._tmgr_process = None self._hb_thread = None self._hb_interval = int(os.getenv('ENTK_HB_INTERVAL', 30)) mq_connection.close()
def __init__(self, cfg, session): self._uid = ru.generate_id(cfg['owner'] + '.scheduling.%(counter)s', ru.ID_CUSTOM) rpu.Component.__init__(self, cfg, session) self._umgr = self._owner
def __init__(self, req): self._work = req self._state = 'NEW' self._result = None if 'uid' in req: self._uid = req['uid'] else: self._uid = ru.generate_id('request')
def __init__(self, cfg, session): self._uid = ru.generate_id(cfg['owner'] + '.executing.%(counter)s', ru.ID_CUSTOM) rpu.Component.__init__(self, cfg, session) # if so configured, let the CU know what to use as tmp dir self._cu_tmp = cfg.get('cu_tmp', os.environ.get('TMP', '/tmp'))
def __init__ (self, pilot) : self.id = ru.generate_id ('d.p.') self.units = dict() self.state = _NEW self.start = None self.pilot = pilot self.descr = pilot.description troy._logger.debug ("new pilot %s" % (self.id))
def __init__ (self, session, task_descriptions=None, relation_descriptions=None) : """ Create a new workload instance. Each new workload is assigned a new ID. Later (service oriented) Troy implementations may allow for an additional id parameter, to reconnect to the thus identified workload instances. """ self.session = session self.id = ru.generate_id ('wl.') tu.Timed.__init__ (self, 'troy.Workload', self.id) self.session.timed_component (self, 'troy.Workload', self.id) tu.Properties.__init__ (self) # register properties, initialize state self.register_property ('id') self.register_property ('state') self.register_property ('tasks') self.register_property ('relations') self.register_property ('partitions') self.register_property ('manager') # initialize essential properties self.state = UNKNOWN self._set_state (DESCRIBED) self.tasks = dict() self.relations = list() self.partitions = list() # initialize partitions self.partitions = [self.id] self.register_property_updater ('state', self.get_state) # initialize private properties self.parametrized = False # register this instance, so that workload can be passed around by id. troy.WorkloadManager.register_workload (self) # fill the workload with given task and relation descriptions if task_descriptions : self.add_task (task_descriptions) if relation_descriptions : self.add_relation (relation_descriptions)
def _assign_uid(self, sid): """ Purpose: Assign a uid to the current object based on the sid passed. Pass the current uid to children of current object """ self._uid = ru.generate_id( 'pipeline.%(item_counter)04d', ru.ID_CUSTOM, namespace=sid) for stage in self._stages: stage._assign_uid(sid) self._pass_uid()
def __init__(self): self.book = [ ] #bookkeeping, maintains a record of all MD tasks carried out self.md_task_list = [] self.ex_task_list = [] self._uid = ru.generate_id('radical.repex.syncex') self._logger = ru.get_logger('radical.repex.syncex') self._prof = ru.Profiler(name=self._uid) self._prof.prof('Initinit', uid=self._uid)
def __init__(self, pmgr, descr): # 'static' members self._descr = descr.as_dict() # sanity checks on description for check in ['resource', 'cores', 'runtime']: if not self._descr.get(check): raise ValueError("ComputePilotDescription needs '%s'" % check) # initialize state self._pmgr = pmgr self._session = self._pmgr.session self._prof = self._session._prof self._uid = ru.generate_id('pilot.%(counter)04d', ru.ID_CUSTOM) self._state = rps.NEW self._log = pmgr._log self._pilot_dict = dict() self._callbacks = dict() self._cache = dict() # cache of SAGA dir handles self._cb_lock = threading.RLock() self._exit_on_error = self._descr.get('exit_on_error') for m in rpt.PMGR_METRICS: self._callbacks[m] = dict() # we always invoke the default state cb self._callbacks[rpt.PILOT_STATE][self._default_state_cb.__name__] = { 'cb' : self._default_state_cb, 'cb_data' : None} # `as_dict()` needs `pilot_dict` and other attributes. Those should all # be available at this point (apart from the sandboxes), so we now # query for those sandboxes. self._pilot_jsurl = ru.Url() self._pilot_jshop = ru.Url() self._resource_sandbox = ru.Url() self._pilot_sandbox = ru.Url() self._client_sandbox = ru.Url() self._log.debug(' ===== 1: %s [%s]', self._pilot_sandbox, type(self._pilot_sandbox)) pilot = self.as_dict() self._log.debug(' ===== 2: %s [%s]', pilot['pilot_sandbox'], type(pilot['pilot_sandbox'])) self._pilot_jsurl, self._pilot_jshop \ = self._session._get_jsurl (pilot) self._resource_sandbox = self._session._get_resource_sandbox(pilot) self._pilot_sandbox = self._session._get_pilot_sandbox (pilot) self._client_sandbox = self._session._get_client_sandbox() self._log.debug(' ===== 3: %s [%s]', self._pilot_sandbox, type(self._pilot_sandbox))
def __init__ (self, user_cfg=None, default=True) : # accept any number of user configs if not isinstance (user_cfg, list) : user_cfg = [user_cfg] # set saga apitype for clean inheritance (cpi to api mapping relies on # _apitype) self._apitype = 'saga.Session' resource_cfg = "%s/resources.json" % os.path.dirname (troy.__file__) config_dir = "%s/.troy" % os.environ.get ('HOME', '/etc/') config_env = "%s" % os.environ.get ('TROY_CONFIG', None) # we read our base config from $HOME/troy/* by default, but also accept # other locations if $TROY_CONFIG is set. Items later in the list below # overwrite earlier ones. self.cfg = tu.get_config ([_config_skeleton, resource_cfg , config_dir , config_env ] + user_cfg) # make sure that the resource sections in the config have the minimal # set of entries for res_name in self.cfg['resources'] : ru.dict_merge (self.cfg['resources'][res_name], _resource_config_skeleton, policy='preserve', logger=troy._logger) # we set the log level as indicated in the troy config or user # config, fallback being log level ERROR log_level = 'ERROR' log_level = self.cfg.get ('log_level', log_level) log_level = os.environ.get ('TROY_VERBOSE', log_level) troy._logger.setLevel (log_level) # now that config parsing is done, we can create the session ID session_id_stub = self.cfg.get ("session_id", 'session.') self.id = ru.generate_id (session_id_stub, mode=ru.ID_UNIQUE) troy._logger.info ("session id: %s" % self.id) # and initialize the inherited saga session tu.Timed.__init__ (self, 'troy.Session', self.id) self.timed_method ('saga.Session', ['init'], saga.Session.__init__, [self, default])
def test_rmgr_base_populate(t, i): if isinstance(t, str): res_dict = { 'resource': t, 'walltime': i, 'cpus': i, 'gpus': i, 'project': t, 'access_schema': t, 'queue': t } rm = BaseRmgr(res_dict, sid=sid, rts=None, rts_config={}) with pytest.raises(EnTKError): rm._populate() rm._validate_resource_desc() rm._populate() res_dict = { 'resource': 'local.localhost', 'walltime': 40, 'cpus': 100, 'gpus': 25, 'project': 'new', 'queue': 'high', 'access_schema': 'gsissh' } rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = BaseRmgr(res_dict, sid=rmgr_id, rts=None, rts_config={}) rmgr._validate_resource_desc() rmgr._populate() assert rmgr._sid == rmgr_id assert rmgr._resource == 'local.localhost' assert rmgr._walltime == 40 assert rmgr._cpus == 100 assert rmgr._gpus == 25 assert rmgr._project == 'new' assert rmgr._access_schema == 'gsissh' assert rmgr._queue == 'high' assert rmgr._validated == True
def __init__ (self, session, descr, _workload=None) : """ Create a new workload element, aka Task, according to the description.. Each new task is assigned a new ID. Later implementations may allow for an additional id parameter, to reconnect to the thus identified task instance. """ self.session = session tu.Properties.__init__ (self, descr) # register properties self.register_property ('id') self.register_property ('state') self.register_property ('tag') self.register_property ('cardinality') self.register_property ('units') self.register_property ('workload') # initialize essential properties self.state = DESCRIBED self.tag = descr.get ('tag', None) self.units = dict() self.workload = _workload # FIXME: complete attribute list, dig properties from description, # perform sanity checks # initialize state self.id = ru.generate_id ('t.') tu.Timed.__init__ (self, 'troy.Task', self.id) self.session.timed_component (self, 'troy.Task', self.id) if self.workload : self.workload.timed_component (self, 'troy.Task', self.id) self.register_property_updater ('state', self.get_state)
def __init__ (self, descr) : self.id = ru.generate_id ('d.u.') self.descr = descr self._state = _NEW self.start = None self.proc = None self.retval = None if not 'executable' in self.descr : self.descr['executable'] = '/bin/true' if not 'arguments' in self.descr : self.descr['arguments'] = list() if not 'environment' in self.descr : self.descr['environment'] = list() troy._logger.debug ("new unit %s" % (self.id))
def __init__ (self, session, translator = AUTOMATIC, scheduler = AUTOMATIC, dispatcher = AUTOMATIC) : """ Create a new workload manager instance. Use default plugins if not indicated otherwise """ self.session = session self.id = ru.generate_id ('wlm.') tu.Timed.__init__ (self, 'troy.WorkloadManager', self.id) self.session.timed_component (self, 'troy.WorkloadManager', self.id) self._stager = None self._plugin_mgr = None self.plugins = dict () # setup plugins from aruments # # We leave actual plugin initialization for later, in case a strategy # wants to alter / complete the plugin selection # # FIXME: we don't need no stupid arguments, ey! Just use # AUTOMATIC by default... self.plugins['translator'] = translator self.plugins['scheduler' ] = scheduler self.plugins['dispatcher'] = dispatcher # lets see if there are any plugin preferences in the config # note that config settings supercede arguments! cfg = session.get_config ('workload_manager') if 'plugin_workload_translator' in cfg : self.plugins['translator'] = cfg['plugin_workload_translator'] if 'plugin_workload_scheduler' in cfg : self.plugins['scheduler' ] = cfg['plugin_workload_scheduler' ] if 'plugin_workload_dispatcher' in cfg : self.plugins['dispatcher'] = cfg['plugin_workload_dispatcher']
def __init__ (self, session, descr=None) : """ Create a new overlay instance, based on the given overlay description Each new overlay is assigned a new ID. Later (service oriented) Troy implementations may allow for an additional id parameter, to reconnect to the thus identified overlay instance. """ if session : self.session = session else: self.session = troy.Session () if not descr : descr = troy.OverlayDescription () if isinstance (descr, dict) : descr = troy.OverlayDescription (descr) self.id = ru.generate_id ('ol.') tu.Timed.__init__ (self, 'troy.Overlay', self.id) self.session.timed_component (self, 'troy.Overlay', self.id) tu.Properties.__init__ (self, descr) # register properties, initialize state self.register_property ('id') self.register_property ('state') self.register_property ('description') self.register_property ('pilots') self.register_property ('manager') # initialize essential properties self.state = DESCRIBED self.description = descr self.pilots = dict() # register this instance, so that overlay can be passed around by id. troy.OverlayManager.register_overlay (self)
def __init__(self, session, translator=AUTOMATIC, scheduler=AUTOMATIC, provisioner=AUTOMATIC): """ Create a new overlay manager instance. Use default plugins if not otherwise indicated. Note that the provisioner plugin is actually not owned by the OverlayManager, but by the pilots of the Overlay managed by the OverlayManager. """ self.session = session self.id = ru.generate_id("olm.") tu.Timed.__init__(self, "troy.OverlayManager", self.id) self.session.timed_component(self, "troy.OverlayManager", self.id) self._plugin_mgr = None self.plugins = dict() # setup plugins from aruments # # We leave actual plugin initialization for later, in case a strategy # wants to alter / complete the plugin selection # # FIXME: we don't need no stupid arguments, ey! Just use # AUTOMATIC by default... self.plugins["translator"] = translator self.plugins["scheduler"] = scheduler self.plugins["provisioner"] = provisioner # lets see if there are any plugin preferences in the config # note that config settings supercede arguments! cfg = session.get_config("overlay_manager") if "plugin_overlay_translator" in cfg: self.plugins["translator"] = cfg["plugin_overlay_translator"] if "plugin_overlay_scheduler" in cfg: self.plugins["scheduler"] = cfg["plugin_overlay_scheduler"] if "plugin_overlay_provisioner" in cfg: self.plugins["provisioner"] = cfg["plugin_overlay_provisioner"]
def create(unit_manager_obj, unit_description, local_state): """ PRIVATE: Create a new compute unit. """ # create and return pilot object computeunit = ComputeUnit() # Make a copy of the UD to work on without side-effects. ud_copy = copy.deepcopy(unit_description) # sanity check on description if (not 'executable' in unit_description or \ not unit_description['executable'] ) and \ (not 'kernel' in unit_description or \ not unit_description['kernel'] ) : raise PilotException ("ComputeUnitDescription needs an executable or application kernel") # Validate number of cores if not unit_description.cores > 0: raise BadParameter("Can't run a Compute Unit with %d cores." % unit_description.cores) # If staging directives exist, try to expand them if ud_copy.input_staging: ud_copy.input_staging = expand_staging_directive(ud_copy.input_staging) if ud_copy.output_staging: ud_copy.output_staging = expand_staging_directive(ud_copy.output_staging) computeunit._description = ud_copy computeunit._manager = unit_manager_obj computeunit._session = unit_manager_obj._session computeunit._worker = unit_manager_obj._worker computeunit._uid = ru.generate_id('unit.%(counter)06d', ru.ID_CUSTOM) computeunit._name = unit_description['name'] computeunit._local_state = local_state computeunit._session.prof.prof('advance', msg=NEW, uid=computeunit._uid, state=NEW) return computeunit
def test_rmgr_rp_get_resource_allocation_state(): res_dict = { 'resource': 'local.localhost', 'walltime': 10, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB config = {"sandbox_cleanup": False, "db_cleanup": False} rmgr_id = ru.generate_id('test.%(item_counter)04d', ru.ID_CUSTOM) rmgr = RPRmgr(res_dict, sid=rmgr_id, rts_config=config) assert not rmgr.get_resource_allocation_state() rmgr._validate_resource_desc() rmgr._populate() rmgr._submit_resource_request() import radical.pilot as rp assert rmgr.get_resource_allocation_state() in [rp.PMGR_ACTIVE, rp.FAILED] rmgr._terminate_resource_request()