def transfer_file(src_files, target_dir, resource, tar): # Remote transfering happens by using a saga session # Here we create one. We are adding as connecting context # gsissh #ctx = rs.Context("gsissh") session = rs.Session() # session.add_context(ctx) # Setting up the remote filesystem connection. `remote_dir` now is a target_connection = resource + ':2222/' remote_dir = rs.filesystem.Directory('gsisftp://' + target_connection, session=session) # Create the remote directory with all the parents remote_dir.make_dir(target_dir, flags=rs.filesystem.CREATE_PARENTS) tgt_dir = 'gsisftp://' + target_connection + target_dir # Do the actual transfer. start_time = time() if tar: SrcFiles = ['tartest.tar'] tar = tarfile.open(SrcFiles[0], "w") for filename in src_files: tar.add(filename) tar.close() else: SrcFiles = src_files for src_file in SrcFiles: src_filename = src_file.split('/')[-1] src_context = { 'pwd': os.path.dirname(os.path.abspath(src_file)), 'unit': tgt_dir, 'pilot': tgt_dir, 'resource': tgt_dir } tgt_context = { 'pwd': tgt_dir, 'unit': tgt_dir, 'pilot': tgt_dir, 'resource': tgt_dir } src = complete_url(src_filename, src_context, ru.get_logger('transfer.files')) tgt = complete_url(src_filename, tgt_context, ru.get_logger('transfer.files')) #target_file = target_dir + src_file.split('/')[-1] remote_dir.copy(src, tgt, flags=rs.filesystem.CREATE_PARENTS) end_time = time() print(end_time - start_time)
def test_logger(): """ Print out some messages with different log levels """ cl = ru.get_logger('engine') cl = ru.get_logger('engine') cl.setLevel('DEBUG') assert cl is not None cl.debug('debug') cl.info('info') cl.warn('warning') cl.error('error') cl.fatal('fatal')
def __init__ (self, kernel_info) : self._info = kernel_info self._name = kernel_info['name'] self._subname = None self._logger = ru.get_logger ('radical.enmd.{0}'.format(self._name)) self._args = [] self._raw_args = [] self._pre_exec = None self._post_exec = None self._environment = None self._executable = None self._arguments = None self._uses_mpi = None self._cores = 1 self._upload_input_data = None self._link_input_data = None self._download_input_data = None self._download_output_data = None self._copy_input_data = None self._copy_output_data = None
def __init__(self, resource='local', comm_server=None): """The workhorse of high throughput binding affinity calculations. Manages arbitrary number of protocols on any resource (including supercomputers). Parameters ---------- resource: str The name of the resource where the protocols will be run. This is usually then name of the supercomputer or 'local' if the job will be executed locally. (the default is to try to run locally). comm_server: tuple(str, int) The communication server used by the execution system. Specify a hostname and port number as a tuple. If None, then the dedicated server might be used from the resource description if present. """ self.resource = yaml.load(resource_stream(__name__, 'resources.yaml'))[resource] if comm_server is None: comm_server = self.resource.get('dedicated_rabbitmq_server') self._protocols = list() self._app_manager = AppManager(*comm_server) # Profiler for Runner self._uid = ru.generate_id('radical.htbac.workflow_runner') self._logger = ru.get_logger('radical.htbac.workflow_runner') self._prof = ru.Profiler(name=self._uid) self._prof.prof('create workflow_runner obj', uid=self._uid) self._root_directories = list()
def run(self): self.uid = self.uid + '.thread' self.log = ru.get_logger('radical.' + self.uid, level=self.verbose) # a simple worker routine which sleeps repeatedly for a random number of # seconds, until a term signal is set. The given 'worker' can be a thread # or process, or in fact anything which has a self.uid and self.term. try: self.log.info('%-10s : work start' % self.uid) while not self.term.is_set(): item = WORK_MIN + (random.random() * (WORK_MAX - WORK_MIN)) self.log.info('%-10s : %ds sleep start' % (self.uid, item)) time.sleep(item) self.log.info('%-10s : %ds sleep stop' % (self.uid, item)) ru.raise_on('work') self.log.info('%-10s : work term requested' % self.uid) except Exception as e: self.log.info('%-10s : work fail [%s]' % (self.uid, e)) raise self.log.info('%-10s : thread exit requested' % self.uid)
def __init__(self, workflow, pending_queue, completed_queue, mq_hostname): self._uid = ru.generate_id('radical.entk.wfprocessor') self._logger = ru.get_logger('radical.entk.wfprocessor') self._workflow = workflow if not isinstance(pending_queue, list): raise TypeError(expected_type=list, actual_type=type(pending_queue)) if not isinstance(completed_queue, list): raise TypeError(expected_type=list, actual_type=type(completed_queue)) if not isinstance(mq_hostname, str): raise TypeError(expected_type=str, actual_type=type(mq_hostname)) # Mqs queue names and channel self._pending_queue = pending_queue self._completed_queue = completed_queue self._mq_hostname = mq_hostname self._wfp_process = None self._resubmit_failed = False self._logger.info('Created WFProcessor object: %s' % self._uid)
def setUp(self): # Create patches for test functions patcher_init = mock.patch.object(Slurm, '__init__', return_value=None) patcher_prof = mock.patch.object(ru.Profiler, 'prof') patcher_host = mock.patch('hostlist.expand_hostlist', return_value=['nodes1', 'nodes2']) # Clean up patches when errors occur self.addCleanup(patcher_init.stop) self.addCleanup(patcher_prof.stop) self.addCleanup(patcher_host.stop) # Start the patches self.mock_init = patcher_init.start() self.mock_prof = patcher_prof.start() self.mock_prof = patcher_host.start() # Initialize the component before each test self.component = Slurm(cfg=None, session=None) self.component._log = ru.get_logger('dummy') self.component.cores_per_node = None self.component.gpus_per_node = None self.component.lfs_per_node = None self.component.lm_info = dict() return
def __init__ (self, adaptor_info, adaptor_options=[]) : self._info = adaptor_info self._opts = adaptor_options self._name = adaptor_info['name'] self._schemas = adaptor_info['schemas'] self._lock = ru.RLock (self._name) self._logger = ru.get_logger('radical.saga.api') has_enabled = False for option in self._opts : if option['name'] == 'enabled' : has_enabled = True if not has_enabled : # *every* adaptor needs an 'enabled' option! self._opts.append ({ 'category' : self._name, 'name' : 'enabled', 'type' : bool, 'default' : True, 'valid_options' : [True, False], 'documentation' : "Enable / disable loading of the adaptor", 'env_variable' : None } ) ruc.Configurable.__init__ (self, 'saga') ruc.Configurable.config_options (self, self._name, self._opts)
def __init__(self, default=True): """ default: bool ret: None """ simple_base = super(Session, self) simple_base.__init__() self._logger = ru.get_logger('radical.saga') # if the default session is expected, we point our context list to the # shared list of the default session singleton. Otherwise, we create # a private list which is not populated. # a session also has a lease manager, for adaptors in this session to use. if default: default_session = DefaultSession() self.contexts = copy.deepcopy(default_session.contexts) self._lease_manager = default_session._lease_manager else: self.contexts = _ContextList(session=self) # FIXME: at the moment, the lease manager is owned by the session. # Howevwer, the pty layer is the main user of the lease manager, # and we thus keep the lease manager options in the pty subsection. # So here we are, in the session, evaluating the pty config options... config = self.get_config('saga.utils.pty') self._lease_manager = ru.LeaseManager( max_pool_size=config['connection_pool_size'].get_value(), max_pool_wait=config['connection_pool_wait'].get_value(), max_obj_age=config['connection_pool_ttl'].get_value())
def __init__(self, adaptor_info, adaptor_options=[]): self._info = adaptor_info self._opts = adaptor_options self._name = adaptor_info['name'] self._schemas = adaptor_info['schemas'] self._lock = ru.RLock(self._name) self._logger = ru.get_logger('radical.saga.api') has_enabled = False for option in self._opts: if option['name'] == 'enabled': has_enabled = True if not has_enabled: # *every* adaptor needs an 'enabled' option! self._opts.append({ 'category': self._name, 'name': 'enabled', 'type': bool, 'default': True, 'valid_options': [True, False], 'documentation': "Enable / disable loading of the adaptor", 'env_variable': None }) ruc.Configurable.__init__(self, 'saga') ruc.Configurable.config_options(self, self._name, self._opts)
def __init__(self, session, db_obj, cb_buffer, scheduler=None): self._uid = ru.generate_id('task_manager.rp') self._logger = ru.get_logger('task_manager.rp') self._session = session self._scheduler = scheduler # NOTE if the cu.uid update is moved then the db obj can be # entirely removed from the task manager since it # acts throught the buffer (ie move this update to buffer) self._db_obj = db_obj self._cb_buffer = cb_buffer self._running_tasks = list() # This dict will take each batch of tasks # and check that they are running, or else # cancel them after a waiting period self._running_mgr = Manager() # # structure of this dict: # batch info need2check need2kill # { (starttime, waittime) : [ [cu.uid, cu.uid, ... ], [cu.uid, cu.uid, ... ] ], # () : [ ... ], # ... # } # self._running_checklist = self._running_mgr.dict() self._initialize()
def __init__ (self, atype): self._atype = atype self._pid = os.getpid () self._uid = ru.generate_id ("%s" % self._atype) self.logger = ru.get_logger('radical.synapse.self._uid') # storage for temporary data and statistics # self._tmpdir = "/scratch/synapse/" # FIXME self._tmpdir = "/tmp/" # FIXME try: os.makedirs (self._tmpdir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir (self._tmpdir): pass else: raise # start worker process self._work_queue = Queue.Queue () self._result_queue = Queue.Queue () self._term = threading.Event() self._proc = threading.Thread (target=self.run) self._proc.start ()
def __init__(self, number_of_replicas, systems=list(), workflow=None, cores=32, ligand=False, full=False, gibbs_steps, thermodynamic_states): self.number_of_replicas = number_of_replicas self.n_gibbs_steps = gibbs_steps self.thermo_state = thermodynamic_states self.ligand = '-ligands' if ligand else '' self.step_count = _full_steps if full else _reduced_steps self.systems = systems self.cores = cores self._id = uuid.uuid1() # generate id # self.workflow = workflow or ['gen_replicas', 'repex', 'rotation', 'translation', 'propagation'] # null workflow self.workflow = workflow or list(range(0,5)) # Profiler for TIES PoE self._uid = ru.generate_id('radical.yank.yank-repex') self._logger = ru.get_logger('radical.yank.yank-repex') self._prof = ru.Profiler(name=self._uid) self._prof.prof('create yank-repex instance', uid=self._uid)
def __init__ (self) : if not hasattr (self, '_apitype') : self._apitype = self._get_apitype () self._logger = ru.get_logger ('radical.saga') self._id = ru.generate_id (self._get_apitype (), mode=ru.ID_SIMPLE)
def __init__(self, resource_desc, db): self._uid = ru.generate_id('resource_manager.rp') self._logger = ru.get_logger('resource_manager.rp') self._mlab_url = os.environ.get('RADICAL_PILOT_DBURL',None) if not self._mlab_url: raise Error(msg='RADICAL_PILOT_DBURL not defined. Please assign a valid mlab url') self._session = None self._pmgr = None self._pilot = None self._resource = None self._runtime = None self._cores = None self._gpus = None self._project = None self._access_schema = None self._queue = None self._directory = os.path.dirname(os.path.abspath(__file__)); self._db = db if self._validate_resource_desc(resource_desc): self._populate(resource_desc) else: raise Error(msg='Resource description incorrect')
def __init__ (self, url, session=None, logger=None, opts=None, posix=True): if logger : self.logger = logger else : self.logger = ru.get_logger('radical.saga.pty') if session: self.session = session else : self.session = ss.Session(default=True) if opts : self.options = opts else : self.options = dict() self.logger.debug ("PTYShell init %s" % self) self.url = url # describes the shell to run self.posix = posix # /bin/sh compatible? self.latency = 0.0 # set by factory self.cp_slave = None # file copy channel self.initialized = False self.pty_id = PTYShell._pty_id PTYShell._pty_id += 1 self.cfg = self.session.get_config('saga.utils.pty') # get prompt pattern from options, config, or use default if 'prompt_pattern' in self.options: self.prompt = self.options['prompt_pattern'] elif 'prompt_pattern' in self.cfg: self.prompt = self.cfg['prompt_pattern'].get_value () else: self.prompt = DEFAULT_PROMPT self.prompt_re = re.compile ("^(.*?)%s" % self.prompt, re.DOTALL) self.logger.info ("PTY prompt pattern: %s" % self.prompt) # we need a local dir for file staging caches. At this point we use # $HOME, but should make this configurable (FIXME) self.base = os.environ['HOME'] + '/.saga/adaptors/shell/' try: os.makedirs (self.base) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir (self.base): pass else: raise se.NoSuccess ("could not create staging dir: %s" % e) self.factory = supsf.PTYShellFactory () self.pty_info = self.factory.initialize (self.url, self.session, self.prompt, self.logger, posix=self.posix) self.pty_shell = self.factory.run_shell (self.pty_info) self._trace ('init : %s' % self.pty_shell.command) self.initialize ()
def test_aprun_construct(self, mocked_init, mocked_raise_on): launch_method = APRun(cfg={'Testing'}, session=self._session) launch_method.launch_command = 'aprun' launch_method._log = ru.get_logger('dummy') aprun_cmd, _ = launch_method.construct_command( self._cu, launch_script_hop=1) self.assertTrue(aprun_cmd == 'aprun -n 2 -N 1 -L node1,node2 -d 1 -cc 0 test_exe ')
def test_transfer_single_folder_to_unit(mocked_init, mocked_method, mocked_profiler, mocked_raise_on): # Get a unit to transfer data for unit, session = setUp() # Instantiate the USIC component = Default(cfg=dict(), session=session) # Assign expected attributes of the component component._fs_cache = dict() component._prof = mocked_profiler component._session = session component._log = ru.get_logger('dummy') # Create an "actionable" with a folder containing a file # staged with our unit actionables = list() actionables.append({ 'uid': ru.generate_id('sd'), 'source': os.path.join(local_sample_data, sample_data[1]), 'action': rp.TRANSFER, 'target': 'unit:///%s' % sample_data[1], 'flags': [], 'priority': 0 }) # Call the component's '_handle_unit' function # Should perform all of the actionables in order component._handle_unit(unit, actionables) # Peek inside the remote directory to verify remote_dir = rs.filesystem.Directory(unit['unit_sandbox'], session=session) # Verify the actionables were done... assert sample_data[1] in [x.path for x in remote_dir.list()] for x in remote_dir.list(): if remote_dir.is_dir(x): child_x_dir = rs.filesystem.Directory(os.path.join( unit['unit_sandbox'], x.path), session=session) assert sample_data[0] in [cx.path for cx in child_x_dir.list()] # Tear-down the files and folders tearDown(unit, session) # Verify tearDown with pytest.raises(rs.BadParameter): remote_dir = rs.filesystem.Directory(unit['unit_sandbox'], session=session)
def run(self): try: self.log = ru.get_logger('radical.' + self.uid + '.child', level=self.verbose) work(self) except ru.ThreadExit: self.log.info('%-10s : thread exit requested' % self.uid) raise
def __init__ (self, adaptor_info, adaptor_options=[]) : self._info = adaptor_info self._opts = adaptor_options self._name = adaptor_info['name'] self._lock = ru.RLock (self._name) self._logger = ru.get_logger ('radical.enmd.{0}'.format(self._name)) self._reporter = ru.LogReporter(name='radical.enmd.{0}'.format(self._name))
def __init__(self, name, cfg, verbose): mt.Thread.__init__(self) self.uid = name self.verbose = verbose self.log = ru.get_logger('radical.' + self.uid, level=self.verbose) self.cfg = cfg self.term = mt.Event() ru.raise_on('init')
def __init__(self): self.book = [ ] #bookkeeping, maintains a record of all MD tasks carried out self.md_task_list = [] self.ex_task_list = [] self._uid = ru.generate_id('radical.repex.syncex') self._logger = ru.get_logger('radical.repex.syncex') self._prof = ru.Profiler(name=self._uid) self._prof.prof('Initinit', uid=self._uid)
def fetch_json(sid, dburl=None, tgt=None, skip_existing=False, session=None, log=None): ''' returns file name ''' if not log and session: log = session._log elif not log: log = ru.get_logger('radical.pilot.utils') if not tgt: tgt = '.' if tgt.startswith('/'): # Assume an absolute path dst = os.path.join(tgt, '%s.json' % sid) else: # Assume a relative path dst = os.path.join(os.getcwd(), tgt, '%s.json' % sid) try: os.makedirs(os.path.dirname(tgt)) except OSError: pass # dir exists if skip_existing and os.path.isfile(dst) \ and os.stat(dst).st_size > 0: log.info("session already in %s", dst) else: if not dburl: dburl = os.environ.get('RADICAL_PILOT_DBURL') if not dburl: raise ValueError('RADICAL_PILOT_DBURL is not set') mongo, db, _, _, _ = ru.mongodb_connect(dburl) json_docs = get_session_docs(db, sid) ru.write_json(json_docs, dst) log.info("session written to %s", dst) mongo.close() log.report.ok("+ %s (json)\n" % sid) return dst
def __init__ (self, session=None, *args, **kwargs) : self._session = session if session : self._logger = session._logger else : self._logger = ru.get_logger ('radical.saga') base_list = super (_ContextList, self) base_list.__init__ (*args, **kwargs)
def __init__(self, session=None, *args, **kwargs): self._session = session if session: self._logger = session._logger else: self._logger = ru.get_logger('radical.saga') base_list = super(_ContextList, self) base_list.__init__(*args, **kwargs)
def __init__(self, adaptor_info, adaptor_options=[]): self._info = adaptor_info self._opts = adaptor_options self._name = adaptor_info['name'] self._lock = ru.RLock(self._name) self._logger = ru.get_logger('radical.enmd.{0}'.format(self._name)) self._reporter = ru.LogReporter( name='radical.enmd.{0}'.format(self._name))
def test_amgr_synchronizer(): logger = ru.get_logger('radical.entk.temp_logger') profiler = ru.Profiler(name='radical.entk.temp') amgr = Amgr(hostname=hostname, port=port) mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=hostname, port=port)) mq_channel = mq_connection.channel() amgr._setup_mqs() p = Pipeline() s = Stage() # Create and add 100 tasks to the stage for cnt in range(100): t = Task() t.executable = ['some-executable-%s' % cnt] s.add_tasks(t) p.add_stages(s) p._assign_uid(amgr._sid) p._validate() amgr.workflow = [p] for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL # Start the synchronizer method in a thread amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() # Start the synchronizer method in a thread proc = Process(target=func_for_synchronizer_test, name='temp-proc', args=(amgr._sid, p, logger, profiler)) proc.start() proc.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULING assert p.stages[0].state == states.SCHEDULING assert p.state == states.SCHEDULING amgr._terminate_sync.set() sync_thread.join()
def __init__(self, url): if url.scheme != 'redis': raise BadParameter( "scheme in url is not supported (%s != redis://...)" % url) self.url = url self.host = 'localhost' self.port = 6379 self.db = 0 self.password = None self.errors = 'strict' if url.host: self.host = url.host if url.port: self.port = url.port if url.username: self.username = url.username if url.password: self.password = url.password # create redis client t1 = time.time() redis.Redis.__init__(self, host=self.host, port=self.port, db=self.db, password=self.password, errors=self.errors) t2 = time.time() # add a logger self.logger = ru.get_logger('radical.saga') # create a cache dict and attach to redis client instance. Cache # lifetime is set to 10 times the redis-connect latency. self.cache = redis_cache.Cache(logger=self.logger, ttl=((t2 - t1) * 10)) # create a second client to manage the (blocking) # pubsub communication for event notifications self.r2 = redis.Redis(host=self.host, port=self.port, db=self.db, password=self.password, errors=self.errors) # set up pubsub endpoint, and start a thread to monitor channels self.callbacks = {} self.pub = self.r2.pubsub() self.pub.subscribe(MON) # FIXME: create one pubsub channel per path (for paths which have # callbacks registered) self.monitor = redis_ns_monitor(self, self.pub) self.monitor.start()
def __init__(self, pending_queue, completed_queue, mq_hostname): self._uid = ru.generate_id('radical.entk.helper') self._logger = ru.get_logger('radical.entk.helper') self._pending_queue = pending_queue self._completed_queue = completed_queue self._mq_hostname = mq_hostname self._helper_process = None self._logger.info('Created helper object: %s' % self._uid)
def test_mpirun_construct(self, mocked_init, mocked_raise_on): launch_method = MPIRun(cfg={'Testing'}, session=self._session) launch_method.launch_command = 'mpirun' launch_method.mpi_version = None launch_method.mpi_flavor = None launch_method._log = ru.get_logger('dummy') mpirun_cmd, _ = launch_method.construct_command(self._cu, launch_script_hop=1) self.assertTrue(mpirun_cmd == 'mpirun -np 4 -host node1,node1,node2,node2 test_exe')
def test_transfer_multiple_folders_from_unit(mocked_init, mocked_method, mocked_profiler, mocked_raise_on): # Get a unit to transfer data for unit, session = setUp() # Instantiate the USIC component = Default(cfg=dict(), session=session) # Assign expected attributes of the component component._cache = dict() component._prof = mocked_profiler component._session = session component._log = ru.get_logger('dummy') # Create an "actionable" with a folder containing another folder # containing a file staged with our unit actionables = list() actionables.append({ 'uid': ru.generate_id('sd'), 'source': 'unit:///%s' % sample_data[2], 'action': rp.TRANSFER, 'target': tgt_loc, 'flags': [], 'priority': 0 }) # Call the component's '_handle_unit' function # Should perform all of the actionables in order component._handle_unit(unit, actionables) # Verify the actionables were done... assert sample_data[2] in [ os.path.basename(x) for x in glob('%s/*' % tgt_loc) ] assert sample_data[1] in [ os.path.basename(x) for x in glob('%s/%s/*' % (tgt_loc, sample_data[2])) ] assert sample_data[0] in [ os.path.basename(x) for x in glob('%s/%s/%s/*.*' % (tgt_loc, sample_data[2], sample_data[1])) ] # Tear-down the files and folders tearDown(unit=unit, session=session, data=sample_data[2]) # Verify tearDown with pytest.raises(rs.BadParameter): remote_dir = rs.filesystem.Directory(unit['unit_sandbox'], session=session)
def __init__ (self) : # the default session picks up default contexts, from all context # adaptors. To implemented, we have to do some legwork: get the engine, # dig through the registered context adaptors, and ask each of them for # default contexts. self.contexts = _ContextList () self._logger = ru.get_logger ('radical.saga') # FIXME: at the moment, the lease manager is owned by the session. # Howevwer, the pty layer is the main user of the lease manager, # and we thus keep the lease manager options in the pty subsection. # So here we are, in the session, evaluating the pty config options... config = saga.engine.engine.Engine ().get_config ('saga.utils.pty') self._lease_manager = ru.LeaseManager ( max_pool_size = config['connection_pool_size'].get_value (), max_pool_wait = config['connection_pool_wait'].get_value (), max_obj_age = config['connection_pool_ttl'].get_value () ) _engine = saga.engine.engine.Engine () if not 'saga.Context' in _engine._adaptor_registry : self._logger.warn ("no context adaptors found") return for schema in _engine._adaptor_registry['saga.Context'] : for info in _engine._adaptor_registry['saga.Context'][schema] : default_ctxs = [] try : default_ctxs = info['adaptor_instance']._get_default_contexts () except se.SagaException as e : self._logger.debug ("adaptor %s failed to provide default" \ "contexts: %s" % (info['adaptor_name'], e)) continue for default_ctx in default_ctxs : try : self.contexts.append (ctx=default_ctx, session=self) self._logger.debug ("default context [%-20s] : %s" \ % (info['adaptor_name'], default_ctx)) except se.SagaException as e : self._logger.debug ("skip default context [%-20s] : %s : %s" \ % (info['adaptor_name'], default_ctx, e)) continue
def __init__ (self, url) : if url.scheme != 'redis' : raise BadParameter ("scheme in url is not supported (%s != redis://...)" % url) self.url = url self.host = 'localhost' self.port = 6379 self.db = 0 self.password = None self.errors = 'strict' if url.host : self.host = url.host if url.port : self.port = url.port if url.username : self.username = url.username if url.password : self.password = url.password # create redis client t1 = time.time () redis.Redis.__init__ (self, host = self.host, port = self.port, db = self.db, password = self.password, errors = self.errors) t2 = time.time () # add a logger self.logger = ru.get_logger ('radical.saga') # create a cache dict and attach to redis client instance. Cache # lifetime is set to 10 times the redis-connect latency. self.cache = redis_cache.Cache (logger=self.logger, ttl=((t2-t1)*10)) # create a second client to manage the (blocking) # pubsub communication for event notifications self.r2 = redis.Redis (host = self.host, port = self.port, db = self.db, password = self.password, errors = self.errors) # set up pubsub endpoint, and start a thread to monitor channels self.callbacks = {} self.pub = self.r2.pubsub () self.pub.subscribe (MON) # FIXME: create one pubsub channel per path (for paths which have # callbacks registered) self.monitor = redis_ns_monitor (self, self.pub) self.monitor.start ()
def __init__ (self, command, logger=None) : """ The class constructor, which runs (execvpe) command in a separately forked process. The bew process will inherit the environment of the application process. :type command: string or list of strings :param command: The given command is what is run as a child, and fed/drained via pty pipes. If given as string, command is split into an array of strings, using :func:`shlex.split`. :type logger: :class:`radical.utils.logger.Logger` instance :param logger: logger stream to send status messages to. """ self.logger = logger if not self.logger : self.logger = ru.get_logger ('radical.saga.pty') self.logger.debug ("PTYProcess init %s" % self) if isinstance (command, basestring) : command = shlex.split (command) if not isinstance (command, list) : raise se.BadParameter ("PTYProcess expects string or list command") if len(command) < 1 : raise se.BadParameter ("PTYProcess expects non-empty command") self.rlock = ru.RLock ("pty process %s" % command) self.command = command # list of strings too run() self.cache = "" # data cache self.tail = "" # tail of data data cache for error messages self.child = None # the process as created by subprocess.Popen self.ptyio = None # the process' io channel, from pty.fork() self.exit_code = None # child died with code (may be revived) self.exit_signal = None # child kill by signal (may be revived) self.recover_max = 3 # TODO: make configure option. This does not self.recover_attempts = 0 # apply for recovers triggered by gc_timeout! try : self.initialize () except Exception as e : raise ptye.translate_exception (e, "pty or process creation failed")
def __init__(self): self._uid = ru.generate_id('radical.entk.appmanager') self._name = str() self._workload = None # Queues self._pending_queue = Queue.Queue() self._executed_queue = Queue.Queue() # Logger self._logger = ru.get_logger('radical.entk.appmanager') self._logger.info('Application Manager initialized')
def __init__(self, name, cfg, verbose): ru.raise_on('init') self.uid = name self.verbose = verbose self.log = ru.get_logger('radical.' + self.uid, level=self.verbose) self.cfg = cfg self.things = list() self.term = None # child only ru.raise_on('init') ru.Process.__init__(self, name=self.uid, log=self.log)
def __init__(self): """Creates the Engine instance (singleton). """ # Initialize the logging self._logger = ru.get_logger('radical.enmd.Engine') # Load execution plug-ins self._execution_plugins = list() self._load_execution_plugins() # Load kernel plug-ins self._kernel_plugins = list() self._load_kernel_plugins()
def __init__(self, pending_queue, executed_queue): self._uid = ru.generate_id('radical.entk.helper') self._logger = ru.get_logger('radical.entk.helper') self._pending_queue = pending_queue self._executed_queue = executed_queue self._terminate = threading.Event() self._helper_thread = None self._thread_alive = False self._logger.info('Created populator object: %s' % self._uid)
def _watch(self): cname = self.name.replace('Component', 'Watcher') self._prof = ru.Profiler(cname) self._prof.prof('run', uid=self._pilot_id) try: self._log = ru.get_logger(cname, target="%s.log" % cname, level='DEBUG') # FIXME? while not self._terminate.is_set(): cus = list() try: # we don't want to only wait for one CU -- then we would # pull CU state too frequently. OTOH, we also don't want to # learn about CUs until all slots are filled, because then # we may not be able to catch finishing CUs in time -- so # there is a fine balance here. Balance means 100 (FIXME). # self._prof.prof('ExecWorker popen watcher pull cu from queue') MAX_QUEUE_BULKSIZE = 100 while len(cus) < MAX_QUEUE_BULKSIZE : cus.append (self._watch_queue.get_nowait()) except Queue.Empty: # nothing found -- no problem, see if any CUs finished pass # add all cus we found to the watchlist for cu in cus : self._prof.prof('passed', msg="ExecWatcher picked up unit", uid=cu['_id']) self._cus_to_watch.append (cu) # check on the known cus. action = self._check_running() if not action and not cus : # nothing happened at all! Zzz for a bit. time.sleep(self._cfg['db_poll_sleeptime']) except Exception as e: self._log.exception("Error in ExecWorker watch loop (%s)" % e) # FIXME: this should signal the ExecWorker for shutdown... self._prof.prof('stop', uid=self._pilot_id) self._prof.flush()
def __init__(self): # Engine manages cpis from adaptors self._adaptor_registry = {} # set the configuration options for this object ruc.Configurable.__init__(self, 'saga') ruc.Configurable.config_options(self, 'saga.engine', _config_options) self._cfg = self.get_config('saga.engine') # Initialize the logging, and log version (this is a singleton!) self._logger = ru.get_logger('radical.saga') # load adaptors self._load_adaptors()
def __init__(self): # Engine manages cpis from adaptors self._adaptor_registry = {} # set the configuration options for this object ruc.Configurable.__init__ (self, 'saga') ruc.Configurable.config_options (self, 'saga.engine', _config_options) self._cfg = self.get_config('saga.engine') # Initialize the logging, and log version (this is a singleton!) self._logger = ru.get_logger ('radical.saga') # load adaptors self._load_adaptors ()
def _get_logger(self, name, level=None): """ This is a thin wrapper around `ru.get_logger()` which makes sure that log files end up in a separate directory with the name of `session.uid`. """ # FIXME: this is only needed because components may use a different # logger namespace - which they should not I guess? if not level: level = os.environ.get('RADICAL_PILOT_VERBOSE') if not level: level = os.environ.get('RADICAL_VERBOSE', 'REPORT') log = ru.get_logger(name, target='.', level=level, path=self._logdir) log.info('radical.pilot version: %s' % rp_version_detail) return log
def __init__(self, flavor, qname, role, address=None): self._flavor = flavor self._qname = qname self._role = role self._addr = address self._debug = False self._name = "queue.%s.%s" % (self._qname, self._role) self._log = ru.get_logger('rp.bridges', target="%s.log" % self._name) if not self._addr: self._addr = 'tcp://*:*' if role in [QUEUE_INPUT, QUEUE_OUTPUT]: self._log.info("create %s - %s - %s - %s", flavor, qname, role, address)
def __init__(self, ctype, cfg): """ This constructor MUST be called by inheriting classes. Note that __init__ is not executed in the process scope of the main event loop -- initialization for the main event loop should be moved to the initialize_child() method. Initialization for component input, output and callbacks should be done in a separate initialize() method, to avoid the situation where __init__ creates threads but later fails and main thus ends up without a handle to terminate the threads (__del__ can deadlock). initialize() is called during start() in the parent's process context. """ self._ctype = ctype self._cfg = cfg self._debug = cfg.get('debug', 'DEBUG') # FIXME self._agent_name = cfg['agent_name'] self._cname = "%s.%s.%d" % (self._agent_name, self._ctype, cfg.get('number', 0)) self._childname = "%s.child" % self._cname self._addr_map = cfg['bridge_addresses'] self._parent = os.getpid() # pid of spawning process self._inputs = list() # queues to get units from self._outputs = dict() # queues to send units to self._publishers = dict() # channels to send notifications to self._subscribers = list() # callbacks for received notifications self._workers = dict() # where units get worked upon self._idlers = list() # idle_callback registry self._terminate = mt.Event() # signal for thread termination self._finalized = False # finalization guard self._is_parent = None # guard initialize/initialize_child self._exit_on_error = True # FIXME: make configurable self._cb_lock = mt.Lock() # guard threaded callback invokations self._clone_cb = None # allocate resources on cloning units self._drop_cb = None # free resources on dropping clones # use agent_name for one log per agent, cname for one log per agent and component log_name = self._cname log_tgt = self._cname + ".log" self._log = ru.get_logger(log_name, log_tgt, self._debug) self._log.info('creating %s' % self._cname) self._prof = ru.Profiler(self._cname) # start the main event loop in a separate process. At that point, the # component will basically detach itself from the parent process, and # will only maintain a handle to be used for shutdown mp.Process.__init__(self, name=self._cname)
def __init__(self, ctype, cfg): """ This constructor MUST be called by inheriting classes. Note that __init__ is not executed in the process scope of the main event loop -- initialization for the main event loop should be moved to the initialize_child() method. Initialization for component input, output and callbacks should be done in a separate initialize() method, to avoid the situation where __init__ creates threads but later fails and main thus ends up without a handle to terminate the threads (__del__ can deadlock). initialize() is called during start() in the parent's process context. """ self._ctype = ctype self._cfg = cfg self._debug = cfg.get('debug', 'DEBUG') # FIXME self._agent_name = cfg['agent_name'] self._cname = "%s.%s.%d" % (self._agent_name, type(self).__name__, cfg.get('number', 0)) self._childname = "%s.child" % self._cname self._addr_map = cfg['bridge_addresses'] self._parent = os.getpid() # pid of spawning process self._inputs = list() # queues to get units from self._outputs = dict() # queues to send units to self._publishers = dict() # channels to send notifications to self._subscribers = dict() # callbacks for received notifications self._workers = dict() # where units get worked upon self._idlers = list() # idle_callback registry self._threads = list() # subscriber threads self._terminate = mt.Event() # signal for thread termination self._finalized = False # finalization guard self._is_parent = None # guard initialize/initialize_child self._exit_on_error = True # FIXME: make configurable self._cb_lock = mt.Lock() # guard threaded callback invokations # use agent_name for one log per agent, cname for one log per agent and component log_name = self._cname log_tgt = self._cname + ".log" self._log = ru.get_logger(log_name, log_tgt, self._debug) self._log.info('creating %s' % self._cname) self._prof = Profiler(self._cname) # start the main event loop in a separate process. At that point, the # component will basically detach itself from the parent process, and # will only maintain a handle to be used for shutdown mp.Process.__init__(self, name=self._cname)
def __init__(self, name, cfg, term, verbose): mt.Thread.__init__(self) self.uid = name self.verbose = verbose self.log = ru.get_logger('radical.' + self.uid, level=verbose) self.cfg = cfg self.term = term ru.raise_on('init') # we don't allow subsubthreads # FIXME: this could be lifted, but we leave in place and # re-evaluate as needed. if not ru.is_main_thread(): raise RuntimeError('threads must be spawned by MainThread [%s]' % \ ru.get_thread_name())
def run(self): self.is_parent = False self.log = ru.get_logger('radical.' + self.uid + '.child', level=self.verbose) ## self.dh = ru.DebugHelper() ## setproctitle.setproctitle('rp.%s.child' % self.uid) ## ## # FIXME: make sure that debug_helper is not capturing signals unless ## # needed (ie. unless RADICAL_DEBUG is set) ## ## def handler(signum, sigframe): ## self.log.info('%-10s : signal handled' % self.uid) ## self.term.set() ## signal.signal(signal.SIGUSR2, handler) work(self)
def __init__(self, name, cfg, term, verbose): ru.raise_on('init') mp.Process.__init__(self) self.uid = name self.verbose = verbose self.log = ru.get_logger('radical.' + self.uid, level=verbose) self.is_parent = True self.cfg = cfg self.wterm = term # term sig shared with parent watcher self.term = mp.Event() # private term signal self.killed = False # start watcher for own children and threads ru.raise_on('init') self.watcher = Watcher(cfg, verbose='error') self.watcher.start() ru.raise_on('init')
def __init__(self, cfg, verbose): ru.raise_on('init') mt.Thread.__init__(self) self.cfg = cfg self.term = mt.Event() self._thread_term = mt.Event() self._proc_term = mp.Event() self.things = list() self.uid = None for name,_ in cfg.iteritems(): if 'watcher' in name: if self.uid: raise ValueError('only one watcher supported') self.uid = name self.log = ru.get_logger('radical.' + self.uid + '.child', level=verbose) ru.raise_on('init') # first create threads and procs to be watched for name,_cfg in cfg.iteritems(): self.log.info('child %s: ', name) if 'child' in name: child = Child(name=name, cfg=_cfg, term=self._proc_term, verbose=verbose) child.start() self.things.append(child) elif 'worker' in name: worker = Worker(name=name, cfg=_cfg, term=self._thread_term, verbose=verbose) worker.start() self.things.append(worker) ru.raise_on('init')
def __init__(self, flavor, channel, role, address=None): """ Addresses are of the form 'tcp://host:port'. Both 'host' and 'port' can be wildcards for BRIDGE roles -- the bridge will report the in and out addresses as obj.bridge_in and obj.bridge_out. """ self._flavor = flavor self._channel = channel self._role = role self._addr = address self._debug = False self._log = ru.get_logger('rp.bridges') self._name = "pubsub.%s.%s" % (self._channel, self._role) self._bridge_in = None # bridge input addr self._bridge_out = None # bridge output addr if not self._addr: self._addr = 'tcp://*:*' self._log.info("create %s - %s - %s", self._channel, self._role, self._addr)
def __init__ (self, api, adaptor) : self._session = None self._adaptor = adaptor self._cpi_cname = self.__class__.__name__ self._logger = ru.get_logger('radical.saga.cpi') # The API object must obviously keep an adaptor instance. If we also # keep an API instance ref in this adaptor base, we create a ref cycle # which will annoy (i.e. disable) garbage collection. We thus use weak # references to break that cycle. The inheriting classes MUST use # get_api() to obtain the API reference. # if api : # self._api = weakref.ref (api) # else : # self._api = None self._api = weakref.ref (api) # by default, we assume that no bulk optimizations are supported by the # adaptor class. Any adaptor class supporting bulks ops must overwrite # the ``_container`` attribute (via # ``self._set_container(container=None)``, and have it point to the # class which implements the respective ``container_*`` methods. self._container = None
def initialize_child(self): setproctitle.setproctitle(self.uid) self.log = ru.get_logger('radical.' + self.uid + '.child', level=self.verbose) ru.raise_on('init') # first create threads and procs to be watched for name,cfg in self.cfg.iteritems(): self.log.info('child %s: ', name) if 'child' in name: child = Child(name=name, cfg=cfg, verbose=self.verbose) child.start() self.things.append(child) elif 'worker' in name: worker = Worker(name=name, cfg=cfg, verbose=self.verbose) worker.start() self.things.append(worker) ru.raise_on('init')
def __init__(self, q1): threading.Thread.__init__(self) self._q1 = q1 self._logger = ru.get_logger('radical.entk.thread')
def __init__ (self) : self.logger = ru.get_logger ('radical.saga.pty') self.registry = {} self.rlock = ru.RLock ('pty shell factory')