Ejemplo n.º 1
0
def get_node_diff(pnn, pcli, rcli, conf):
    """
    Get the diff between the rucio and phedex at a node
    :pnn:  node name
    :pcli: phedex client instance
    :rcli: rucio client instance
    :multi_das_calls: perform one DAS call for each dataset starting letter
    :filters: include and exclude filters (by default all datasets are included)

    return the list of datasets to add, remove and update
    as in DEFAULT_DATADIFF_DICT
    """
    timing = {}

    multi_das_calls = conf['multi_das_calls']
    select = conf['select']
    ignore = conf['ignore']

    blocks_at_pnn = get_timing(get_blocks_at_pnn(pnn, pcli, multi_das_calls),
                               timing)

    datasets_at_rse = get_timing(get_datasets_at_rse(rcli), timing)

    diff = compare_data_lists(blocks_at_pnn, datasets_at_rse, pnn)

    _diff_apply_filter(diff, select, ignore)

    diff['timing'].update(timing)

    return diff
Ejemplo n.º 2
0
                        default=1,
                        help='number of parallel threads. Default 1.')

    OPTIONS = PARSER.parse_args()

    logging.my_lvl(OPTIONS.verbosity)

    #    logging.summary('DBP1')

    install_mp_handler()
    POOL = multiprocessing.Pool(int(OPTIONS.pool))

    PCLI = PhEDEx()

    PNNS = PCLI.pnns(select=OPTIONS.pnn)

    TIMING = {}

    WILDCARD = re.compile(r'\S*[*]\S*')

    DATASETS = get_timing(_get_dset_list(PCLI, OPTIONS.dataset), TIMING)

    PROCS = get_timing(_launch_workers(PNNS, DATASETS, POOL, OPTIONS, PCLI),
                       TIMING)

    get_timing(_get_workers(POOL, PROCS), TIMING)

    logging.summary(
        'Final Stats: n.pnns: %d, n.datasets: %d, poolsize: %d, timing: %s',
        len(PNNS), len(DATASETS), int(OPTIONS.pool), TIMING)
Ejemplo n.º 3
0
def pnn_sync(pnn, pcli):
    """
    Synchronize one rucio dataset at one rse
    :pnn:    phedex node name.
    :pcli:   phedex client.
    """

    summary = copy.deepcopy(DEFAULT_PNN_SUMMARY)

    conf = _get_config(pnn)
    summary['conf'] = conf

    if 'verbosity' in conf:
        logging.my_lvl(conf['verbosity'])

    rcli = Client(account=SYNC_ACCOUNT_FMT % pnn.lower())

    if _pnn_abort(pnn, summary, rcli):
        return summary

    diff = get_node_diff(pnn, pcli, rcli, conf)
    summary['timing'].update(diff['timing'])
    diff = diff['return']
    summary['diff'] = diff['summary']

    if (diff['summary']['tot'] == diff['summary']['to_remove']) and \
        not conf['allow_clean']:
        logging.warning('All datasets to be removed. Aborting.')
        summary['status'] = 'aborted'
        return summary

    logging.notice("Got diff=%s, timing=%s", summary['diff'],
                   summary['timing'])

    if _pnn_abort(pnn, summary, rcli):
        return summary

    workers = get_timing(_launch_pnn_workers(conf, diff, pnn, pcli, rcli),
                         summary['timing'])

    summary['workers'] = len(workers)

    logging.notice("Launched %d workers, pool size %d, timing %s",
                   summary['workers'], int(conf['pool']),
                   summary['timing']['_launch_pnn_workers'])

    left = int(conf['chunck']) - summary['workers'] + int(
        conf['min_deletions'])

    if left > 0:
        workers_st = get_timing(
            _launch_pnn_workers_st(left, diff, pnn, pcli, rcli),
            summary['timing'])

        summary['workers_st'] = len(workers_st)

        logging.notice("Launched %d single thread workers, timing %s",
                       summary['workers_st'],
                       summary['timing']['_launch_pnn_workers_st'])

        workers = dict(workers, **workers_st)

    _get_pnn_workers(workers, summary)

    summary['status'] = 'finished'

    return summary
Ejemplo n.º 4
0
def pnn_sync(pnn, pcli):
    """
    Synchronize one rucio dataset at one rse
    :pnn:    phedex node name.
    :pcli:   phedex client.
    """
    monitor.record_counter('cms_sync.site_started')
    summary = copy.deepcopy(DEFAULT_PNN_SUMMARY)

    conf = _get_config(pnn)
    summary['conf'] = conf

    if 'verbosity' in conf:
        logging.my_lvl(conf['verbosity'])

    rcli = Client(account=SYNC_ACCOUNT_FMT % pnn.lower())

    if _pnn_abort(pnn, summary, rcli):
        return summary
# Do the loop here? with conf['multi_das']

    if conf['multi_das_calls']:
        prefixes = list(string.letters + string.digits)
        random.shuffle(prefixes)
    else:
        prefixes = [None]

    for prefix in prefixes:
        diff = get_node_diff(pnn, pcli, rcli, conf, prefix=prefix)
        summary['timing'].update(diff['timing'])
        diff = diff['return']
        summary['diff'] = diff['summary']

        if (diff['summary']['tot']
                == diff['summary']['to_remove']) and not conf['allow_clean']:
            logging.warning('All datasets to be removed. Aborting.')
            summary['status'] = 'aborted'
            continue


#            return summary

        logging.notice("Got diff=%s, timing=%s", summary['diff'],
                       summary['timing'])

        if _pnn_abort(pnn, summary, rcli):
            return summary

        workers = get_timing(_launch_pnn_workers(conf, diff, pnn, pcli, rcli),
                             summary['timing'])

    summary['workers'] = len(workers)

    logging.notice("Launched %d workers, pool size %d, timing %s",
                   summary['workers'], int(conf['pool']),
                   summary['timing']['_launch_pnn_workers'])

    left = int(conf['chunck']) - summary['workers'] + int(
        conf['min_deletions'])

    if left > 0:
        workers_st = get_timing(
            _launch_pnn_workers_st(left, diff, pnn, pcli, rcli),
            summary['timing'])

        summary['workers_st'] = len(workers_st)

        logging.notice("Launched %d single thread workers, timing %s",
                       summary['workers_st'],
                       summary['timing']['_launch_pnn_workers_st'])

        workers = dict(workers, **workers_st)

    _get_pnn_workers(workers, summary)
    monitor.record_counter('cms_sync.site_completed')

    summary['status'] = 'finished'

    return summary