Beispiel #1
0
def get_blocks_at_pnn(pnn, pcli, multi_das_calls=True, prefix=None):
    """
    Get the list of completed replicas of closed blocks at a site
    :pnn:  the phedex node name
    :pcli: phedex client instance

    returns a dictionnary with <block name>: <number of files>
    """

    # This is not optimal in terms of calls and time but reduces the memory footprint

    blocks_at_pnn = {}
    if prefix:
        logging.summary('Getting subset of blocks at %s beginning with %s' % (pnn, prefix))
        with monitor.record_timer_block('cms_sync.pnn_blocks_split'):
            logging.summary('Getting blocks at %s starting with %s' % (pnn, prefix))
            some_blocks_at_pnn = pcli.blocks_at_site(pnn=pnn, prefix=prefix)
            blocks_at_pnn.update(some_blocks_at_pnn)
            logging.summary('Got blocks at %s starting with %s' % (pnn, prefix))
    elif multi_das_calls:
        logging.summary('Getting all blocks at %s. Multiple %s' % (pnn, multi_das_calls))
        logging.notice('Getting blocks with multiple das calls. %s', list(string.letters + string.digits))
        for item in list(string.letters + string.digits):
            with monitor.record_timer_block('cms_sync.pnn_blocks_split'):
                logging.summary('Getting blocks at %s starting with %s' % (pnn, item))
                some_blocks_at_pnn = pcli.blocks_at_site(pnn=pnn, prefix=item)
                blocks_at_pnn.update(some_blocks_at_pnn)
                logging.summary('Got blocks at %s starting with %s' % (pnn, item))
    else:
        logging.summary('Getting all blocks at %s in one call' % pnn)
        with monitor.record_timer_block('cms_sync.pnn_blocks_all'):
            blocks_at_pnn = pcli.blocks_at_site(pnn=pnn)

    logging.summary('Got blocks at %s.' % pnn)
    return blocks_at_pnn
Beispiel #2
0
def compare_data_lists(blocks, datasets, pnn):
    """
    Compare the list of blocks at pnn and dataset at rse
    :blocks:   list of file blocks
    :datasets: list of rucio datasets
    :pnn:      phedex node name

    return the liste of datasets to add, remove and update
    as in DEFAULT_DATADIFF_DICT
    """
    with monitor.record_timer_block('cms_sync.time_compare_rse_datasets'):
        ret = copy.deepcopy(DEFAULT_DATADIFF_DICT)

        dataitems = list(set(blocks.keys() + datasets.keys()))

        for dataset in dataitems:
            if dataset not in datasets:
                ret['missing'].append(dataset)
                ret['summary']['missing'] += 1

            elif dataset not in blocks:
                ret['to_remove'].append(dataset)
                ret['summary']['to_remove'] += 1

            elif blocks[dataset] != datasets[dataset]:
                logging.warning("Dataset %s at pnn %s to update", dataset, pnn)
                ret['to_update'].append(dataset)
                ret['summary']['to_update'] += 1

            ret['summary']['tot'] += 1

    return ret
Beispiel #3
0
def get_node_diff(pnn, pcli, rcli, conf, prefix=None):
    """
    Get the diff between the rucio and phedex at a node
    :pnn:  node name
    :pcli: phedex client instance
    :rcli: rucio client instance
    :multi_das_calls: perform one DAS call for each dataset starting letter
    :filters: include and exclude filters (by default all datasets are included)

    return the list of datasets to add, remove and update
    as in DEFAULT_DATADIFF_DICT
    """
    timing = {}
    with monitor.record_timer_block('cms_sync.time_node_diff'):
        multi_das_calls = conf['multi_das_calls']
        select = conf['select']
        ignore = conf['ignore']

        blocks_at_pnn = get_timing(
            get_blocks_at_pnn(pnn, pcli, multi_das_calls, prefix=prefix),
            timing)
        datasets_at_rse = get_timing(get_datasets_at_rse(rcli, prefix=prefix),
                                     timing)
        diff = compare_data_lists(blocks_at_pnn, datasets_at_rse, pnn)
        _diff_apply_filter(diff, select, ignore)

        diff['timing'].update(timing)

    return diff
Beispiel #4
0
def block_sync(pnn, rds, pcli, rcli):
    """
    Synchronize one rucio dataset at one rse
    :pnn:    pnn.
    :rds:    rucio dataset.
    :pcli:   phedex client.
    :rcli:   rucio client.
    """

    conf = _get_config(pnn)

    if 'block_verbosity' in conf:
        logging.my_lvl(conf['block_verbosity'])

    if not conf['run']:
        return 'aborted'

    if not _ping(rcli):
        logging.warning('Cannot Ping, aborting.')
        return 'aborted'

    with monitor.record_timer_block('cms_sync.time_block_sync'):
        ret = _replica_update(
            dataset=rds,
            pnn=pnn,
            rse=conf['rse'],
            pcli=pcli,
            rcli=rcli,
            dry=conf['dry'],
            monitor=monitor,
        )

    return ret
Beispiel #5
0
def get_datasets_at_rse(rcli, prefix=None):
    """
    Get the list of rucio datasets at a rse, listing the rules
    belonging to the sync account
    :rcli:  rucio client (with the sync account)

    returns a dictionnary with <dataset name>: <number of files>
    """
    with monitor.record_timer_block('cms_sync.rse_datasets'):
        retval = {
            item['name']: item['locks_ok_cnt']
            for item in rcli.list_account_rules(rcli.__dict__['account'])
            if item['expires_at'] is None and (prefix is None or item['name'].startswith(prefix))
        }
    return retval
Beispiel #6
0
def _replica_update(dataset, pnn, rse, pcli, rcli, dry):
    with monitor.record_timer_block('cms_sync.update_replica'):
        ret = CMSRucioDatasetReplica(
            rds=dataset,
            pnn=pnn,
            rse=rse,
            pcli=pcli,
            rcli=rcli
        ).update(
            dry=dry
        )

        ret['replicas']['added'] = len(ret['replicas']['added'])
        ret['replicas']['removed'] = len(ret['replicas']['removed'])
    return ret
Beispiel #7
0
def _get_dset_list(pcli, datasets):
    with monitor.record_timer_block('cms_sync.get_dataset_list'):
        logging.verbose("Getting datasets list for: %s",
                        datasets)
        ret = []

        wildcard = re.compile(r'\S*[*]\S*')

        for dset in datasets:
            ret.extend([
                item for
                item in pcli.list_data_items(pditem=dset, metadata=False, locality=False)
                if not wildcard.match(item)
            ])

        ret = list(set(ret))

    logging.verbose("Got %d datasets", len(ret))

    return ret