def _drain_up(workers, pnns): """ wait untill all pnn workers have finished. Requeus the rse. """ logging.summary('Starting draining up') for pnn, work in workers.items(): work.get() pnns.append(pnn)
def _launch_workers(pool, workers, pnns, pcli): """ launch pnn workers """ logging.summary('Launching worker for %s', pnns) for pnn in pnns: workers[pnn] = pool.apply_async( pnn_sync, (), {'pnn':pnn, 'pcli': pcli} )
def _poll_workers(workers, pnns): """ poll for finished pnn workers """ logging.debug('polling workers: %s; pnns %s', workers, pnns) done = [] for pnn, work in workers.items(): logging.debug("Checking worker %s", pnn) if work.ready(): logging.debug("Worker %s is ready", pnn) work.get() done.append(pnn) pnns.append(pnn) logging.summary("Got worker %s and re-queued", pnn) for pnn in done: workers.pop(pnn)
def sync(config, logs): """ Main Sync process """ logging.my_logfile(logs=logs) logging.my_fmt(label='main_sync') starttime = datetime.now() modify = {} workers = {} # this is the array of running pnns pnns = None # this is the array of pnn to be launched pool = None pcli = PhEDEx() install_mp_handler() conf = _load_config(config, modify, starttime) pnns = [] size = conf['main']['pool'] logging.summary('Starting') while conf['main']['run']: if pool is None: logging.notice('Started pool of size %d', size) pool = multiprocessing.NDPool(size) add = [ pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run'] if pnn not in workers if pnn not in pnns ] pnns += add random.shuffle(pnns) if not _ping(): logging.warning('Cannot ping, not launching workers') else: _launch_workers(pool, workers, pnns, pcli) pnns = [] _poll_workers(workers, pnns) conf = _load_config(config, modify, starttime) if not conf['main']['run'] or\ conf['main']['pool'] != size: # trigger draining of all workers, close the pool and wait # for the task to be over conf = _load_config(config, {'default': {'run': False}}, starttime) _drain_up(workers, pnns) workers = {} pool.close() pool = None size = conf['main']['pool'] else: time.sleep(conf['main']['sleep']) logging.summary('Exiting.') return config
default=1, help='number of parallel threads. Default 1.') OPTIONS = PARSER.parse_args() logging.my_lvl(OPTIONS.verbosity) # logging.summary('DBP1') install_mp_handler() POOL = multiprocessing.Pool(int(OPTIONS.pool)) PCLI = PhEDEx() PNNS = PCLI.pnns(select=OPTIONS.pnn) TIMING = {} WILDCARD = re.compile(r'\S*[*]\S*') DATASETS = get_timing(_get_dset_list(PCLI, OPTIONS.dataset), TIMING) PROCS = get_timing(_launch_workers(PNNS, DATASETS, POOL, OPTIONS, PCLI), TIMING) get_timing(_get_workers(POOL, PROCS), TIMING) logging.summary( 'Final Stats: n.pnns: %d, n.datasets: %d, poolsize: %d, timing: %s', len(PNNS), len(DATASETS), int(OPTIONS.pool), TIMING)
def get_blocks_at_pnn(pnn, pcli, multi_das_calls=True, prefix=None): """ Get the list of completed replicas of closed blocks at a site :pnn: the phedex node name :pcli: phedex client instance returns a dictionnary with <block name>: <number of files> """ # This is not optimal in terms of calls and time but reduces the memory footprint blocks_at_pnn = {} if prefix: logging.summary('Getting subset of blocks at %s beginning with %s' % (pnn, prefix)) with monitor.record_timer_block('cms_sync.time_pnn_blocks_split'): logging.summary('Getting blocks at %s starting with %s' % (pnn, prefix)) some_blocks_at_pnn = pcli.blocks_at_site(pnn=pnn, prefix=prefix) blocks_at_pnn.update(some_blocks_at_pnn) logging.summary('Got blocks at %s starting with %s' % (pnn, prefix)) elif multi_das_calls: logging.summary('Getting all blocks at %s. Multiple %s' % (pnn, multi_das_calls)) prefixes = list(string.letters + string.digits) random.shuffle(prefixes) logging.notice('Getting blocks with multiple das calls. %s', prefixes) for item in prefixes: with monitor.record_timer_block('cms_sync.time_pnn_blocks_split'): logging.summary('Getting blocks at %s starting with %s' % (pnn, item)) some_blocks_at_pnn = pcli.blocks_at_site(pnn=pnn, prefix=item) blocks_at_pnn.update(some_blocks_at_pnn) logging.summary('Got blocks at %s starting with %s' % (pnn, item)) else: logging.summary('Getting all blocks at %s in one call' % pnn) with monitor.record_timer_block('cms_sync.time_pnn_blocks_all'): blocks_at_pnn = pcli.blocks_at_site(pnn=pnn) logging.summary('Got blocks at %s.' % pnn) return blocks_at_pnn