def block_sync(pnn, rds, pcli, rcli): """ Synchronize one rucio dataset at one rse :pnn: pnn. :rds: rucio dataset. :pcli: phedex client. :rcli: rucio client. """ conf = _get_config(pnn) if 'block_verbosity' in conf: logging.my_lvl(conf['block_verbosity']) if not conf['run']: return 'aborted' if not _ping(rcli): logging.warning('Cannot Ping, aborting.') return 'aborted' ret = _replica_update(dataset=rds, pnn=pnn, rse=conf['rse'], pcli=pcli, rcli=rcli, dry=conf['dry']) return ret
def _load_config(conffile, modif=None, starttime=None): """ Gets the conf file and dumps it to the working copy :conffile: file to be loaded :modif: dictionnary with modifications returns the content dictionnary """ starttime = starttime or datetime.now() try: conf = _open_yaml(conffile, modif) except yaml.parser.ParserError: logging.warning('Problem parsing config. Using loaded one.') conf = _open_yaml(LOADED_CONF) default = dict(DEFAULT_PNN_CONF, **conf.pop('default')) main = dict(DEFAULT_MAIN_CONF, **conf.pop('main')) loaded = dict({'main': main}, **{ pnn: dict(default, **dict({'rse': pnn}, **sec)) for pnn, sec in conf.items() }) loaded = { name: _run_status(sec, starttime) for name, sec in loaded.items() } logging.my_lvl(loaded['main']['verbosity']) logging.debug('Loaded conf %s from %s with modif %s', loaded, conffile, modif) with open(LOADED_CONF, 'w') as outfile: yaml.dump(loaded, outfile, default_flow_style=False) return loaded
def pnn_sync(pnn, pcli): """ Synchronize one rucio dataset at one rse :pnn: phedex node name. :pcli: phedex client. """ summary = copy.deepcopy(DEFAULT_PNN_SUMMARY) conf = _get_config(pnn) summary['conf'] = conf if 'verbosity' in conf: logging.my_lvl(conf['verbosity']) rcli = Client(account=SYNC_ACCOUNT_FMT % pnn.lower()) if _pnn_abort(pnn, summary, rcli): return summary diff = get_node_diff(pnn, pcli, rcli, conf) summary['timing'].update(diff['timing']) diff = diff['return'] summary['diff'] = diff['summary'] if (diff['summary']['tot'] == diff['summary']['to_remove']) and \ not conf['allow_clean']: logging.warning('All datasets to be removed. Aborting.') summary['status'] = 'aborted' return summary logging.notice("Got diff=%s, timing=%s", summary['diff'], summary['timing']) if _pnn_abort(pnn, summary, rcli): return summary workers = get_timing(_launch_pnn_workers(conf, diff, pnn, pcli, rcli), summary['timing']) summary['workers'] = len(workers) logging.notice("Launched %d workers, pool size %d, timing %s", summary['workers'], int(conf['pool']), summary['timing']['_launch_pnn_workers']) left = int(conf['chunck']) - summary['workers'] + int( conf['min_deletions']) if left > 0: workers_st = get_timing( _launch_pnn_workers_st(left, diff, pnn, pcli, rcli), summary['timing']) summary['workers_st'] = len(workers_st) logging.notice("Launched %d single thread workers, timing %s", summary['workers_st'], summary['timing']['_launch_pnn_workers_st']) workers = dict(workers, **workers_st) _get_pnn_workers(workers, summary) summary['status'] = 'finished' return summary
default=None, help='Rucio account. default the sync account') PARSER.add_argument( '--dataset', dest='dataset', action='append', default=[], help='dataset to be updates. Can have wildcard and can be multiple') PARSER.add_argument('--pool', dest='pool', default=1, help='number of parallel threads. Default 1.') OPTIONS = PARSER.parse_args() logging.my_lvl(OPTIONS.verbosity) # logging.summary('DBP1') install_mp_handler() POOL = multiprocessing.Pool(int(OPTIONS.pool)) PCLI = PhEDEx() PNNS = PCLI.pnns(select=OPTIONS.pnn) TIMING = {} WILDCARD = re.compile(r'\S*[*]\S*') DATASETS = get_timing(_get_dset_list(PCLI, OPTIONS.dataset), TIMING)
def pnn_sync(pnn, pcli): """ Synchronize one rucio dataset at one rse :pnn: phedex node name. :pcli: phedex client. """ monitor.record_counter('cms_sync.site_started') summary = copy.deepcopy(DEFAULT_PNN_SUMMARY) conf = _get_config(pnn) summary['conf'] = conf if 'verbosity' in conf: logging.my_lvl(conf['verbosity']) rcli = Client(account=SYNC_ACCOUNT_FMT % pnn.lower()) if _pnn_abort(pnn, summary, rcli): return summary # Do the loop here? with conf['multi_das'] if conf['multi_das_calls']: prefixes = list(string.letters + string.digits) random.shuffle(prefixes) else: prefixes = [None] for prefix in prefixes: diff = get_node_diff(pnn, pcli, rcli, conf, prefix=prefix) summary['timing'].update(diff['timing']) diff = diff['return'] summary['diff'] = diff['summary'] if (diff['summary']['tot'] == diff['summary']['to_remove']) and not conf['allow_clean']: logging.warning('All datasets to be removed. Aborting.') summary['status'] = 'aborted' continue # return summary logging.notice("Got diff=%s, timing=%s", summary['diff'], summary['timing']) if _pnn_abort(pnn, summary, rcli): return summary workers = get_timing(_launch_pnn_workers(conf, diff, pnn, pcli, rcli), summary['timing']) summary['workers'] = len(workers) logging.notice("Launched %d workers, pool size %d, timing %s", summary['workers'], int(conf['pool']), summary['timing']['_launch_pnn_workers']) left = int(conf['chunck']) - summary['workers'] + int( conf['min_deletions']) if left > 0: workers_st = get_timing( _launch_pnn_workers_st(left, diff, pnn, pcli, rcli), summary['timing']) summary['workers_st'] = len(workers_st) logging.notice("Launched %d single thread workers, timing %s", summary['workers_st'], summary['timing']['_launch_pnn_workers_st']) workers = dict(workers, **workers_st) _get_pnn_workers(workers, summary) monitor.record_counter('cms_sync.site_completed') summary['status'] = 'finished' return summary