예제 #1
0
    def __init__(self, pnn, account, auth_type=None, rsetype=DEFAULT_RSETYPE, suffix=None,
                 dry=False, fts=None, tier=None, lfn2pfn_algorithm=None, country=None,
                 attrs=None, seinfo=None, tfc=None, tfc_exclude=EXCLUDE_TFC, domains=None,
                 space_token=None, add_prefix=None, proto=DEFAULT_PROTOCOL,
                 instance=DEFAULT_PHEDEX_INST, dasgoclient=DEFAULT_DASGOCLIENT,
                 datasvc=DEFAULT_DATASVC_URL):

        attrs = attrs or []

        self.pnn = pnn
        self.rsetype = rsetype
        if suffix is None:
            suffix = DEFAULT_SUFFIXES[rsetype]

        self.suffix = suffix
        self.rsename = pnn + self.suffix

        if tfc and os.path.isdir(tfc):
            self.tfc = tfc + '/' + pnn + '/PhEDEx/storage.xml'
        else:
            self.tfc = tfc

        self.pcli = PhEDEx(instance=instance, dasgoclient=dasgoclient, datasvc=datasvc)
        self.rcli = Client(account=account, auth_type=auth_type)

        self.dry = dry

        self._get_attributes(fts, tier, lfn2pfn_algorithm, country, attrs)

        self._get_settings()

        self._get_protocol(seinfo, add_prefix, tfc_exclude, domains, space_token, proto)
예제 #2
0
    def __init__(self,
                 pnn,
                 account,
                 auth_type=None,
                 rsetype=DEFAULT_RSETYPE,
                 suffix=None,
                 dry=False,
                 fts=None,
                 tier=None,
                 lfn2pfn_algorithm=None,
                 country=None,
                 attrs=None,
                 seinfo=None,
                 tfc=None,
                 tfc_exclude=EXCLUDE_TFC,
                 domains=None,
                 space_token=None,
                 add_prefix=None,
                 proto=DEFAULT_PROTOCOL,
                 instance=DEFAULT_PHEDEX_INST,
                 dasgoclient=DEFAULT_DASGOCLIENT,
                 datasvc=DEFAULT_DATASVC_URL):

        attrs = attrs or []

        self.pnn = pnn
        self.rsetype = rsetype
        if suffix is None:
            suffix = DEFAULT_SUFFIXES[rsetype]

        self.suffix = suffix
        if pnn.endswith('_MSS'):
            raise ValueError(
                'Please import PhEDEx _Buffer pnns rather than _MSS for tape endpoints'
            )
        elif pnn.endswith('_Buffer'):
            self.rsename = pnn.replace('_Buffer', '_Tape') + self.suffix
            self.rucio_rse_type = 'TAPE'
        else:
            self.rsename = pnn + self.suffix
            self.rucio_rse_type = 'DISK'

        if tfc and os.path.isdir(tfc):
            self.tfc = tfc + '/' + pnn + '/PhEDEx/storage.xml'
        else:
            self.tfc = tfc

        self.pcli = PhEDEx(instance=instance,
                           dasgoclient=dasgoclient,
                           datasvc=datasvc)
        self.rcli = Client(account=account, auth_type=auth_type)

        self.dry = dry

        self._get_attributes(fts, tier, lfn2pfn_algorithm, country, attrs)

        self._get_settings()

        self._get_protocol(seinfo, add_prefix, tfc_exclude, domains,
                           space_token, proto)
예제 #3
0
    def __init__(self, options):
        self.options = options

        self.config = load_config(options.config)
        self.last_synced = {}  # load_last_synced()
        self.phedex_svc = PhEDEx()

        pass
예제 #4
0
    def _get_pcli(self, pcli):
        if pcli is None:
            pcli = {}

        if isinstance(pcli, dict):
            self.pcli = PhEDEx(**pcli)
        elif isinstance(pcli, PhEDEx):
            # pylint: disable=redefined-variable-type
            self.pcli = pcli
        else:
            raise Exception("wrong type for pcli parameter %s" % type(pcli))
예제 #5
0
    def __init__(self,
                 block_name,
                 pnn,
                 rse=None,
                 lifetime=None,
                 dry_run=False):
        """
        Get the status of replica of pditem at pnn
        considering only closed blocks completely replicated at site.

        :rds:    PhEDEx block name.
        :pnn:    PhEDEx node name.
        :rse:    Rucio RSE. If None (default) inferred by the pnn using DEFAULT_RSE_FMT.
        :scope:  Scope. Default: DEFAULT_SCOPE.
        """

        self.phedex_svc = PhEDEx()
        self.dry_run = dry_run

        self.pnn = pnn
        if rse is None:
            self.rse = list_rses('cms_type=real&pnn=%s' % self.pnn)[0]['rse']
        else:
            self.rse = rse
        rse_details = get_rse(self.rse)
        self.rse_id = rse_details['id']

        self.account = (SYNC_ACCOUNT_FMT % self.rse.lower())[:25]
        self.container = self.phedex_svc.check_data_item(
            pditem=block_name)['pds']
        self.scope = DEFAULT_SCOPE
        self.block_name = block_name
        self.lifetime = lifetime

        self.group, self.custodial, self.is_at_pnn = self.phedex_svc.block_at_pnn_phedex(
            block=self.block_name, pnn=self.pnn)
        self.block_in_phedex = self.phedex_svc.block_exists(
            block=self.block_name)
        self.block_known = self.phedex_svc.block_known(block=self.block_name)

        if self.is_at_pnn:
            self.replicas = self.phedex_svc.fileblock_files_phedex(
                pnn=pnn, pfb=block_name)
        else:
            self.replicas = {}

        self.container_exists = None
        self.block_exists = None
        self.rule_exists = None

        touch(text=self.rse)
예제 #6
0
    def __init__(self,
                 account,
                 auth_type=None,
                 exclude=DEFAULT_EXCLUDE_LINKS,
                 distance=None,
                 phedex_links=False,
                 rselist=None,
                 instance=DEFAULT_PHEDEX_INST,
                 datasvc=DEFAULT_DATASVC_URL):

        if distance is None:
            distance = DEFAULT_DISTANCE_RULES

        self.pcli = PhEDEx(instance=instance, datasvc=datasvc)
        self.rcli = Client(account=account, auth_type=auth_type)

        self._get_rselist(rselist)

        self._get_matrix(distance, phedex_links, exclude)
예제 #7
0
class SiteSyncer(object):
    def __init__(self, options):
        self.options = options

        self.config = load_config(options.config)
        self.last_synced = {}  # load_last_synced()
        self.phedex_svc = PhEDEx()
        self.patterns = []

        return

    def sync_site(self, site_pair):
        """
        Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site
        :return:
        """
        site, prefix = site_pair

        if site.endswith('_Tape'):
            pnn = site.replace('_Tape', '_MSS')
        else:
            pnn = site

        # now = int(time.time())

        # Set 1980 as the last sync date if no data exists
        # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600)
        # last_week = int(site_last_synced - 7 * 24 * 3600)

        if self.config.get('default', None):
            if self.config['default'].get('chunck', 0):
                BLOCKS_PER_ACTION = int(self.config['default']['chunck'])
            if self.config['default'].get('select', None):
                self.patterns = [self.config['default']['select']]

        with monitor.record_timer_block('cms_sync.time_site_sync'):

            r_timer = 'cms_sync.time_rucio_block_list_all'
            p_timer = 'cms_sync.time_phedex_block_list_all'
            if prefix:
                r_timer = 'cms_sync.time_rucio_block_list_partial'
                p_timer = 'cms_sync.time_phedex_block_list_partial'

            # Add touches to keep from getting killed as long as progress is being made
            with monitor.record_timer_block(p_timer):
                touch(text='PQ ' + site)
                phedex_blocks = self.phedex_svc.blocks_at_site(pnn=pnn,
                                                               prefix=prefix,
                                                               since=None)
            with monitor.record_timer_block(r_timer):
                touch(text='RQ ' + site)
                rucio_blocks = self.get_datasets_at_rse(rse=site,
                                                        prefix=prefix)
                touch(text='DQ ' + site)

            n_blocks_in_phedex = len(phedex_blocks)
            n_blocks_in_rucio = len(rucio_blocks)

            # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed
            if not n_blocks_in_phedex and n_blocks_in_rucio:
                logging.warning(
                    "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s",
                    site, n_blocks_in_phedex, n_blocks_in_rucio, prefix)
                return
            if not n_blocks_in_phedex and not n_blocks_in_rucio:
                logging.info(
                    "At %s:%s, nothing in PhEDEx or Rucio. Quitting." %
                    (site, prefix))
                return

            block_report = compare_site_blocks(phedex=phedex_blocks,
                                               rucio=rucio_blocks,
                                               rse=site,
                                               patterns=self.patterns)

            n_blocks_not_in_rucio = len(block_report['not_rucio'])
            n_blocks_not_in_phedex = len(block_report['not_phedex'])
            n_incomplete_blocks = len(block_report['incomplete'])

            logging.info("At %s:%s In both/PhEDEx only/Rucio only: %s/%s/%s" %
                         (site, prefix, len(block_report['complete']),
                          n_blocks_not_in_rucio, n_blocks_not_in_phedex))
            if len(block_report['complete']
                   ) or n_blocks_not_in_rucio or n_blocks_not_in_phedex:
                logging.info(
                    'At %s:%s %3.0f%% complete', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio +
                     n_blocks_not_in_phedex))
            if len(block_report['complete']) or n_blocks_not_in_rucio:
                logging.info(
                    'At %s:%s %3.0f%% completely added', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio))
            # Truncate lists if we want to reduce cycle time
            if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION:
                block_report['not_rucio'] = set(
                    list(block_report['not_rucio'])[:BLOCKS_PER_ACTION])
                n_blocks_not_in_rucio = len(block_report['not_rucio'])
            if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION:
                block_report['not_phedex'] = set(
                    list(block_report['not_phedex'])[:BLOCKS_PER_ACTION])
                n_blocks_not_in_phedex = len(block_report['not_phedex'])

            logging.info('Adding   %6d blocks to   Rucio for %s:%s',
                         n_blocks_not_in_rucio, site, prefix)
            for block in block_report['not_rucio']:
                logging.info('Adding to rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)
                bs.add_to_rucio()

            logging.info('Removing %6d blocks from Rucio for %s:%s',
                         n_blocks_not_in_phedex, site, prefix)
            for block in block_report['not_phedex']:
                logging.info('Removing from rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)
                bs.remove_from_rucio()

            for block in block_report['incomplete']:
                logging.warn('Redoing sync for %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)
                bs.add_to_rucio(recover=True)

            logging.info('Finished syncing                      %s:%s' %
                         (site, prefix))

    def chunks_to_sync(self):
        """
        Turn the config into a list of site/prefix pairs which need to be synced
        :return: The site prefix pairs
        """

        to_sync = []

        for site, site_config in self.config.items():
            print('Site %s (%s)is ok %s' %
                  (site, type(site), site not in ['default', 'main']))
            if site not in ['default', 'main']:
                if site_config.get('multi_das_calls', False):
                    for prefix in list(string.letters + string.digits):
                        if ('T0' in site or 'FNAL' in site) and prefix == 'S':
                            for fnal_prefix in ('Sc', 'Se', 'Si', 'Sp', 'St',
                                                'SI', 'SM', 'ST', 'SU', 'SV'):
                                to_sync.append((site, fnal_prefix))
                        elif 'FNAL' in site and prefix == 'M':
                            for fnal_prefix in ('Ma', 'MC', 'ME', 'Mi', 'Mo',
                                                'MS', 'Mu'):
                                to_sync.append((site, fnal_prefix))
                        elif ('T0' in site
                              or 'FNAL' in site) and prefix == 'D':
                            for fnal_prefix in ('Da', 'Di', 'DM', 'Do', 'DP',
                                                'Ds', 'DS', 'DY'):
                                to_sync.append((site, fnal_prefix))
                        elif ('T0' in site
                              or 'FNAL' in site) and prefix == 'T':
                            for fnal_prefix in ('T1', 'T4', 'T5', 'TH', 'TK',
                                                'TO', 'TA', 'TB', 'TC', 'TG',
                                                'TZ', 'T_', 'TT', 'TW', 'Tk',
                                                'To', 'Ta', 'Tb', 'Te', 'Tp',
                                                'Tr', 'Ts', 'Tt', 'Tw'):
                                to_sync.append((site, fnal_prefix))
                        elif ('T0' in site
                              or 'FNAL' in site) and prefix == 'H':
                            for fnal_prefix in ('H0', 'H1', 'Ha', 'He', 'Hi',
                                                'HJ', 'Hp', 'HP', 'Hs', 'HS',
                                                'HT', 'HV', 'HW', 'HZ'):
                                to_sync.append((site, fnal_prefix))
                        else:
                            to_sync.append((site, prefix))
                else:
                    to_sync.append((site, None))

        # Cut the list (keep in order but choose a random starting point)
        offset = random.randrange(len(to_sync))
        to_sync = to_sync[offset:] + to_sync[:offset]

        return to_sync

    @staticmethod
    def get_datasets_at_rse(rse, prefix=None):
        """

        :param rse: The RSE name
        :param prefix: Character(s) to restrict the dataset search
        :return: a dictionary with <dataset name>: <number of files>
        """

        filters = {'scope': 'cms', 'did_type': DIDType.DATASET}
        if prefix:
            filters['name'] = '/' + prefix + '*'

        account = SYNC_ACCOUNT_FMT % rse.lower()
        rule_filters = {
            'account': account,
            'scope': 'cms',
            'did_type': DIDType.DATASET
        }

        with monitor.record_timer_block('cms_sync.time_rse_datasets'):
            synced_ds = {
                item['name']
                for item in list_replication_rules(filters=rule_filters)
                if item['expires_at'] is None and (
                    prefix is None or item['name'].startswith('/' + prefix))
            }

            all_datasets = [
                dataset['name']
                for dataset in list_datasets_per_rse(rse=rse, filters=filters)
            ]

            logging.info('Getting all datasets at %s with prefix %s' %
                         (rse, prefix))

            datasets = {}

            for dataset in all_datasets:
                if dataset in synced_ds:
                    for ds in list_dataset_replicas(scope='cms',
                                                    name=dataset,
                                                    deep=True):
                        if ds['rse'] == rse:
                            datasets.update({dataset: ds['available_length']})

        return datasets
예제 #8
0
class BlockSyncer(object):
    """
    Class representing the replica at a site af a CMS Dataset (PhEDEx FileBlock)
    """
    def __init__(self,
                 block_name,
                 pnn,
                 rse=None,
                 lifetime=None,
                 dry_run=False):
        """
        Get the status of replica of pditem at pnn
        considering only closed blocks completely replicated at site.

        :rds:    PhEDEx block name.
        :pnn:    PhEDEx node name.
        :rse:    Rucio RSE. If None (default) inferred by the pnn using DEFAULT_RSE_FMT.
        :scope:  Scope. Default: DEFAULT_SCOPE.
        """

        self.phedex_svc = PhEDEx()
        self.dry_run = dry_run

        self.pnn = pnn
        if rse is None:
            self.rse = list_rses('cms_type=real&pnn=%s' % self.pnn)[0]['rse']
        else:
            self.rse = rse
        rse_details = get_rse(self.rse)
        self.rse_id = rse_details['id']

        self.account = SYNC_ACCOUNT_FMT % self.rse.lower()
        self.container = self.phedex_svc.check_data_item(
            pditem=block_name)['pds']
        self.scope = DEFAULT_SCOPE
        self.block_name = block_name
        self.lifetime = lifetime

        self.group, self.custodial = self.phedex_svc.block_at_pnn_phedex(
            block=self.block_name, pnn=self.pnn)
        self.is_at_pnn = bool(self.group)

        if self.is_at_pnn:
            self.replicas = self.phedex_svc.fileblock_files_phedex(
                pnn=pnn, pfb=block_name)
        else:
            self.replicas = {}

        self.container_exists = None
        self.block_exists = None
        self.rule_exists = None

        touch(text=self.rse)

    def add_to_rucio(self, recover=False):
        """"""
        with monitor.record_timer_block('cms_sync.time_add_block'):
            self.register_container()
            block_exists = self.register_block()
            if block_exists:
                self.update_replicas()
                if recover:
                    self.make_replicas_available()
                self.update_rule()
            else:
                logging.critical('Unable to make the block %s',
                                 self.block_name)

    def remove_from_rucio(self):
        """"""
        with monitor.record_timer_block('cms_sync.time_remove_block'):
            self.update_replicas()
            self.update_rule()

    def register_container(self):
        self.container_exists = False
        if self.is_at_pnn and self.dry_run:
            logging.info('Dry Run: Create container %s in scope %s.',
                         self.container, self.scope)
            self.container_exists = True
            return self.container_exists

        try:
            get_did(scope=self.scope, name=self.container)
            monitor.record_counter('cms_sync.container_exists')
            self.container_exists = True
            logging.info('Found container %s', self.container)
        except DataIdentifierNotFound:
            if self.is_at_pnn:
                try:
                    logging.info('Create container %s in scope %s.',
                                 self.container, self.scope)
                    add_did(scope=self.scope,
                            name=self.container,
                            type='CONTAINER',
                            issuer=self.account,
                            lifetime=self.lifetime)
                    monitor.record_counter('cms_sync.container_created')
                    self.container_exists = True
                    logging.info('Created container %s in scope %s.',
                                 self.container, self.scope)
                except DataIdentifierAlreadyExists:
                    logging.warning('Container was created in the meanwhile')
                    monitor.record_counter('cms_sync.container_collision')
                    self.container_exists = True
            else:
                logging.warning('Container was not at PNN')

        return self.container_exists

    def register_block(self):
        """
        Register the dataset (if there is a replica at the pnn) and attach to container
        :dry: Dry run. Default false.
        """

        # FIXME: The logic here could use some improvement as we try to create a block even if it exists already

        try:
            get_did(scope=self.scope, name=self.block_name)
            self.block_exists = True
            monitor.record_counter('cms_sync.dataset_exists')
        except DataIdentifierNotFound:
            self.block_exists = False

        if self.is_at_pnn and self.dry_run:
            logging.info('Dry Run: Create dataset %s in scope %s.',
                         self.block_name, self.scope)
            self.block_exists = True
        elif self.is_at_pnn:
            logging.info('Create block %s in scope %s.', self.block_name,
                         self.scope)
            try:
                if not self.block_exists:
                    add_did(scope=self.scope,
                            name=self.block_name,
                            type='DATASET',
                            issuer=self.account,
                            lifetime=self.lifetime)
                    monitor.record_counter('cms_sync.dataset_created')
            except DataIdentifierAlreadyExists:
                logging.warning('Attempt to add %s:%s failed, already exists.',
                                self.scope, self.block_name)
                monitor.record_counter('cms_sync.dataset_collision')

            try:
                attach_dids(scope=self.scope,
                            name=self.container,
                            attachment={
                                'dids': [{
                                    'scope': self.scope,
                                    'name': self.block_name
                                }]
                            },
                            issuer=self.account)
            except DuplicateContent:
                logging.warning(
                    'Attempt to add %s:%s to %s failed, already exists.',
                    self.scope, self.block_name, self.container)
            except DataIdentifierNotFound:
                logging.error(
                    'Attempt to add %s:%s to %s failed. Container does not exist.',
                    self.scope, self.block_name, self.container)
                return False
            self.block_exists = True
        else:
            logging.warning('Block %s was not at PNN', self.block_name)

        return self.block_exists

    def update_rule(self):
        """
        Adds or removes the rule for the block.
        """

        rules = list_replication_rules(filters={
            'scope': self.scope,
            'name': self.block_name
        })
        # rules = self.rcli.list_did_rules(scope=self.scope, name=self.block_name)
        rse_expression = 'rse=' + self.rse

        remove_rules = [
            rule for rule in rules if rule['account'] == self.account
            and rule['rse_expression'] == rse_expression
        ]

        if not remove_rules and self.is_at_pnn:
            self.rule_exists = False
            if self.dry_run:
                logging.info("Dry run: Adding rule for dataset %s at rse %s.",
                             self.block_name, self.rse)
            else:
                self.add_replication_rule_with_defaults(
                    dids=[{
                        'scope': self.scope,
                        'name': self.block_name
                    }],
                    copies=1,
                    rse_expression=rse_expression,
                    account=self.account)
                monitor.record_counter('cms_sync.rules_added')
            self.rule_exists = True
        elif remove_rules and not self.is_at_pnn:
            self.rule_exists = True
            if self.dry_run:
                logging.info("Removing rules for dataset %s at rse %s.",
                             self.block_name, self.rse)
            else:
                for rule in remove_rules:
                    # delete_replication_rule(rule['id'], purge_replicas=False, issuer=self.account)
                    delete_rule(rule_id=rule['id'],
                                purge_replicas=True,
                                soft=False)
                    monitor.record_counter('cms_sync.rules_removed')
            self.rule_exists = False

    def update_replicas(self):
        """
        Add or removes replicas for the dataset at rse.
        """

        with monitor.record_timer_block('cms_sync.time_update_replica'):
            logging.info('Updating replicas for %s:%s at %s', self.scope,
                         self.block_name, self.rse)
            replicas = list_replicas(dids=[{
                'scope': self.scope,
                'name': self.block_name
            }],
                                     rse_expression='rse=%s' % self.rse)
            try:
                rucio_replicas = {repl['name'] for repl in replicas}
            except TypeError:
                rucio_replicas = set()

            phedex_replicas = set(self.replicas.keys())
            missing = list(phedex_replicas - rucio_replicas)
            to_remove = list(rucio_replicas - phedex_replicas)

            if missing and (len(phedex_replicas) != len(missing)):
                logging.warn(
                    'Recovery: Inconsistency found for %s at %s: %s in PhEDEx and %s missing',
                    self.rse, self.block_name, len(phedex_replicas),
                    len(missing))

            if missing:
                lfns_added = self.add_missing_replicas(missing)
                monitor.record_counter('cms_sync.files_added',
                                       delta=lfns_added)
            if to_remove:
                lfns_removed = self.remove_extra_replicas(to_remove)
                monitor.record_counter('cms_sync.files_removed',
                                       delta=lfns_removed)

        return

    def make_replicas_available(self):
        """
        Marks available replicas for the dataset at rse if they are in PhEDEx
        """

        with monitor.record_timer_block('cms_sync.time_recover_replica'):
            logging.info('Recovering unavailable replicas for %s:%s at %s',
                         self.scope, self.block_name, self.rse)

            replicas = list_replicas(dids=[{
                'scope': self.scope,
                'name': self.block_name
            }],
                                     rse_expression='rse=%s' % self.rse,
                                     all_states=True)

            try:
                unavailable_replicas = {
                    repl['name']
                    for repl in replicas
                    if repl['states'][self.rse] != 'AVAILABLE'
                }
            except TypeError:
                unavailable_replicas = set()

            phedex_replicas = set(self.replicas.keys())
            missing = list(phedex_replicas & unavailable_replicas)

            logging.info(
                'Recovery for %s:%s at %s: PhEDEx has %s, Rucio unavailable %s. Missing: %s ',
                self.scope, self.block_name, self.rse, len(phedex_replicas),
                len(unavailable_replicas), len(missing))

            # Fix up things which are unavailable
            rse_details = get_rse(self.rse)
            rse_id = rse_details['id']
            scope = InternalScope(self.scope)
            state = 'A'

            for name in missing:
                logging.info('Setting available %s:%s at %s', self.scope, name,
                             self.rse)
                core_update_state(rse_id=rse_id,
                                  scope=scope,
                                  name=name,
                                  state=state)

            monitor.record_counter('cms_sync.files_made_available',
                                   delta=len(missing))

        return

    def remove_extra_replicas(self, to_remove):
        """
        :param to_remove: replicas to remove from Rucio
        :return:
        """
        scope = InternalScope(self.scope)
        with monitor.record_timer_block('cms_sync.time_remove_replica'):
            if to_remove and self.dry_run:
                logging.info('Dry run: Removing replicas %s from rse %s.',
                             str(to_remove), self.rse)
            elif to_remove:
                logging.debug('Removing %s replicas from rse %s.',
                              len(to_remove), self.rse)
                for to_remove_chunk in chunks(to_remove, REMOVE_CHUNK_SIZE):
                    replicas = [{
                        'scope': scope,
                        'name': lfn,
                        "rse_id": self.rse_id,
                        "state": "U"
                    } for lfn in to_remove_chunk]
                    # transactional_session here?
                    # while lock is set stuck, judge-repairer might make transfer requests before rule is gone but does it matter?
                    update_replicas_states(
                        replicas=replicas,
                        add_tombstone=False,
                    )

                # delete_replicas(rse=self.rse, issuer=self.account,
                #                     files=[{'scope': self.scope, 'name': lfn} for lfn in to_remove_chunk])
                return len(to_remove)

    def add_missing_replicas(self, missing):
        """
        :param missing: possible missing lfns
        :return:
        """

        with monitor.record_timer_block('cms_sync.time_add_replica'):
            if missing and self.dry_run:
                logging.info('Dry run: Adding replicas %s to rse %s.',
                             str(missing), self.rse)
            elif missing:
                logging.info('Adding %s replicas to rse %s.', len(missing),
                             self.rse)
                replicas_to_add = [self.replicas[lfn] for lfn in missing]
                files = replica_file_list(replicas=replicas_to_add,
                                          scope=self.scope)
                for rucio_file in files:
                    try:
                        update_file = copy.deepcopy(rucio_file)
                        update_file.update({
                            'scope': InternalScope(self.scope),
                            "rse_id": self.rse_id,
                            "state": "A"
                        })
                        update_replicas_states(replicas=[update_file],
                                               add_tombstone=False)
                    except ReplicaNotFound:
                        try:
                            add_replicas(rse=self.rse,
                                         files=[rucio_file],
                                         issuer=self.account,
                                         ignore_availability=True)
                        except RucioException:
                            logging.critical(
                                'Could not add %s to %s. Constraint violated?',
                                rucio_file, self.rse)
                            resurrect([{
                                'scope': rucio_file['scope'],
                                'name': rucio_file['name']
                            }],
                                      issuer=self.account)
                            add_replicas(rse=self.rse,
                                         files=[rucio_file],
                                         issuer=self.account,
                                         ignore_availability=True)
                            logging.critical('Resurrected %s at %s',
                                             rucio_file, self.rse)

                # add_replicas(rse=self.rse, files=files, issuer=self.account)
                lfns = [
                    item['name'] for item in list_files(
                        scope=self.scope, name=self.block_name, long=False)
                ]

                missing_lfns = list(set(missing) - set(lfns))

                if missing_lfns:
                    logging.debug('Attaching %s lfns to %s at %s',
                                  len(missing_lfns), self.block_name, self.rse)
                    dids = [{
                        'scope': self.scope,
                        'name': lfn
                    } for lfn in missing_lfns]
                    try:
                        attach_dids(scope=self.scope,
                                    name=self.block_name,
                                    attachment={'dids': dids},
                                    issuer=self.account)
                    except FileAlreadyExists:
                        logging.warning(
                            'Trying to attach already existing files to %s',
                            self.block_name)
                    except DataIdentifierNotFound:
                        logging.critical(
                            'Could not attach to %s at %s. Constraint violated?',
                            self.block_name, self.rse)
                return len(missing_lfns)

    def add_replication_rule_with_defaults(self, dids, copies, rse_expression,
                                           account):
        """
        Add replication rule requires one to send all the values. Add a list of defaults.
        If true options are required, move them into the parameter list.

        :param dids: List of dids (scope/name dictionary)
        :param copies: Number of copies
        :param rse_expression: RSE expression
        :param account: Account for the rule
        :return: None
        """

        (grouping, weight, lifetime, locked, subscription_id,
         source_replica_expression, notify, purge_replicas,
         ignore_availability, comment, ask_approval, asynchronous, priority,
         split_container) = ('DATASET', None, None, False, None, None, None,
                             False, False, None, False, False, 3, False)

        activity = 'Data Consolidation'
        meta = json.dumps({
            "phedex_group": self.group,
            "phedex_custodial": self.custodial
        })

        add_replication_rule(
            dids=dids,
            copies=copies,
            rse_expression=rse_expression,
            account=account,
            grouping=grouping,
            weight=weight,
            lifetime=lifetime,
            locked=locked,
            subscription_id=subscription_id,
            source_replica_expression=source_replica_expression,
            activity=activity,
            notify=notify,
            purge_replicas=purge_replicas,
            ignore_availability=ignore_availability,
            comment=comment,
            ask_approval=ask_approval,
            asynchronous=asynchronous,
            priority=priority,
            split_container=split_container,
            meta=meta,
            issuer=account)
예제 #9
0
def sync(config, logs):
    """
    Main Sync process
    """

    logging.my_logfile(logs=logs)
    logging.my_fmt(label='main_sync')
    starttime = datetime.now()
    modify = {}
    workers = {}  # this is the array of running pnns
    pnns = None  # this is the array of pnn to be launched
    pool = None

    pcli = PhEDEx()

    install_mp_handler()

    conf = _load_config(config, modify, starttime)

    pnns = []

    size = conf['main']['pool']

    logging.summary('Starting')

    while conf['main']['run']:

        if pool is None:
            logging.notice('Started pool of size %d', size)
            pool = multiprocessing.NDPool(size)

        add = [
            pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run']
            if pnn not in workers if pnn not in pnns
        ]

        pnns += add

        random.shuffle(pnns)

        if not _ping():
            logging.warning('Cannot ping, not launching workers')
        else:
            _launch_workers(pool, workers, pnns, pcli)
            pnns = []

        _poll_workers(workers, pnns)

        conf = _load_config(config, modify, starttime)

        if not conf['main']['run'] or\
            conf['main']['pool'] != size:

            # trigger draining of all workers, close the pool and wait
            # for the task to be over
            conf = _load_config(config, {'default': {'run': False}}, starttime)
            _drain_up(workers, pnns)
            workers = {}
            pool.close()
            pool = None
            size = conf['main']['pool']

        else:
            time.sleep(conf['main']['sleep'])

    logging.summary('Exiting.')

    return config
예제 #10
0
class CMSRucioDatasetReplica(object):
    """
    Class repeesenting the replica at a site af a CMS Dataset (PhEDEx FileBlock)
    """

    #pylint: disable=too-many-arguments
    def __init__(self,
                 rds,
                 pnn,
                 rse=None,
                 scope=DEFAULT_SCOPE,
                 lifetime=None,
                 pcli=None,
                 rcli=None):
        """
        Get the status of replica of pditem at pnn
        considering only closed blocks completely replicated at site.

        :pnn:    PhEDEx node name.
        :rds:    Rucio Dataset (PhEDEx FileBlock) name.
        :rse:    Rucio RSE. If None (default) inferred by the pnn using DEFAULT_RSE_FMT.
        :scope:  Scope. Default: DEFAULT_SCOPE.
        :pcli:   Reference to a phedex.PhEDEx object or a dict
                 {'instance': <instance>, 'dasgoclient': <path>, 'datasvc': <url>}
                 none of the keys is mandatory. Default is {}.
        :rcli:   Reference to a rucio Client() instance or a dict
                 {'accont': ..., ... } none of the keys is mandatory.
                 Default is {'account': <sync account>}
        """

        self.pnn = pnn

        self._get_pcli(pcli)

        self._get_rcli(rcli)

        if rse is None:
            self.rse = self.rcli.list_rses('cms_type=real&pnn=%s' %
                                           self.pnn)[0]['rse']
        else:
            self.rse = rse

        self.container = self.pcli.check_data_item(pditem=rds)['pds']

        self.dataset = rds

        self.scope = scope

        self.lifetime = lifetime

        self.block_at_pnn()

        if self.is_at_pnn:
            self.replicas = self.pcli.fileblock_files(pnn=pnn, pfb=rds)
        else:
            self.replicas = {}

    def _get_pcli(self, pcli):
        if pcli is None:
            pcli = {}

        if isinstance(pcli, dict):
            self.pcli = PhEDEx(**pcli)
        elif isinstance(pcli, PhEDEx):
            #pylint: disable=redefined-variable-type
            self.pcli = pcli
        else:
            raise Exception("wrong type for pcli parameter %s" %\
                            type(pcli))

    def _get_rcli(self, rcli):
        if rcli is None:
            rcli = {}

        if isinstance(rcli, dict):
            if 'account' not in rcli:
                rcli['account'] = SYNC_ACCOUNT_FMT % self.pnn.lower()
            self.rcli = Client(**rcli)
        elif isinstance(rcli, Client):
            #pylint: disable=redefined-variable-type
            self.rcli = rcli
        else:
            raise Exception("wrong type for rcli parameter %s" %\
                            type(rcli))

    def block_at_pnn(self):
        """
        Verify if the block is at pnn (using phedex datasvn)
        """
        metadata = self.pcli.list_data_items(pditem=self.dataset,
                                             pnn=self.pnn,
                                             locality=True,
                                             metadata=True)
        self.is_at_pnn = bool(len(metadata) == 1 and\
            'block' in metadata[0] and\
            'replica' in metadata[0]['block'][0] and\
            metadata[0]['block'][0]['replica'][0]['complete'] == 'y')

    def register_container(self, dry=False):
        """
        Register container of the dataset
        (only if there is a dataset replica on the pnn)
        :dry: Dry run. Default false.
        """

        try:
            self.rcli.get_did(scope=self.scope, name=self.container)
            return 'exists'
        except DataIdentifierNotFound:
            pass

        if self.is_at_pnn and dry:
            logging.dry('Create container %s in scope %s.', self.container,
                        self.scope)
            return 'created'
        elif self.is_at_pnn:
            logging.verbose('Create container %s in scope %s.', self.container,
                            self.scope)
            try:
                self.rcli.add_container(scope=self.scope,
                                        name=self.container,
                                        lifetime=self.lifetime)

            except DataIdentifierAlreadyExists:
                logging.warning('Container was created in the meanwhile')
                return 'exists'

            return 'created'

        return 'skipped'

    def register_dataset(self, dry=False):
        """
        Register the dataset (if there is a replica at the pnn)
        :dry: Dry run. Default false.
        """

        try:
            self.rcli.get_did(scope=self.scope, name=self.dataset)
            return 'exists'
        except DataIdentifierNotFound:
            pass

        if self.is_at_pnn and dry:
            logging.dry('Create dataset %s in scope %s.', self.dataset,
                        self.scope)
            return 'created'

        elif self.is_at_pnn:
            logging.verbose('Create dataset %s in scope %s.', self.dataset,
                            self.scope)
            self.rcli.add_dataset(scope=self.scope,
                                  name=self.dataset,
                                  lifetime=self.lifetime)
            self.rcli.attach_dids(scope=self.scope,
                                  name=self.container,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': self.dataset
                                  }])
            return 'created'

        return 'skipped'

    def update_replicas(self, dry=False):
        """
        Add or removes replicas for the dataset at rse.
        :dry:  Drydrun. default false
        """

        logging.notice('Updating replicas for %s:%s at %s' %
                       (self.scope, self.dataset, self.rse))

        replicas = self.rcli.list_replicas([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                           rse_expression='rse=%s' % self.rse)

        rrepl = [repl['name'] for repl in replicas]

        prepl = [repl for repl in self.replicas.keys()]

        missing = list(set(prepl) - set(rrepl))

        to_remove = list(set(rrepl) - set(prepl))

        if missing and dry:
            logging.dry('Adding replicas %s to rse %s.', str(missing),
                        self.rse)

        elif missing:
            logging.verbose('Adding replicas %s to rse %s.', str(missing),
                            self.rse)

            self.rcli.add_replicas(rse=self.rse,
                                   files=[{
                                       'scope':
                                       self.scope,
                                       'name':
                                       self.replicas[lfn]['name'],
                                       'adler32':
                                       self.replicas[lfn]['checksum'],
                                       'bytes':
                                       self.replicas[lfn]['size'],
                                   } for lfn in missing])

            # missing files that are not in the list of dataset files
            # are to be attached.
            lfns = [
                item['name']
                for item in self.rcli.list_files(scope=self.scope,
                                                 name=self.dataset)
            ]

            missing_lfns = list(set(missing) - set(lfns))
            if missing_lfns:
                logging.verbose('Attaching lfns %s to dataset %s.',
                                str(missing_lfns), self.dataset)

                try:
                    self.rcli.attach_dids(
                        scope=self.scope,
                        name=self.dataset,
                        dids=[{
                            'scope': self.scope,
                            'name': lfn
                        } for lfn in list(set(missing) - set(lfns))])

                except FileAlreadyExists:
                    logging.warning('Trying to attach already existing files.')

        if to_remove and dry:
            logging.dry('Removing replicas %s from rse %s.', str(to_remove),
                        self.rse)

        elif to_remove:
            logging.verbose('Removing replicas %s from rse %s.',
                            str(to_remove), self.rse)
            for to_remove_chunk in chunks(to_remove, REMOVE_CHUNK_SIZE):
                attempt = 0
                while True:
                    attempt += 1
                    try:
                        self.rcli.delete_replicas(rse=self.rse,
                                                  files=[{
                                                      'scope': self.scope,
                                                      'name': lfn,
                                                  } for lfn in to_remove_chunk
                                                         ])
                        break
                    except DatabaseException:
                        logging.warning(
                            'DatabaseException raised, retrying...')
                        if attempt > 3:
                            raise
                        time.sleep(randint(1, 5))

        return {'added': missing, 'removed': to_remove}

    def update_rule(self, dry=False):
        """
        Adds or removes the rule for the dataset.
        :dry:  Drydrun. default false

        returns the action performed: None, added, removed
        """
        rules = self.rcli.list_did_rules(scope=self.scope, name=self.dataset)
        rrule = None
        account = self.rcli.__dict__['account']
        action = None
        rse_exp = 'rse=' + self.rse

        rrule = next((
            rule for rule in rules
            if rule['account'] == account and\
                rule['rse_expression'] == rse_exp
        ), None)

        if rrule is None and self.is_at_pnn:

            if dry:
                logging.dry("Adding rule for dataset %s at rse %s.",
                            self.dataset, self.rse)
            else:
                self.rcli.add_replication_rule(
                    dids=[{
                        'scope': self.scope,
                        'name': self.dataset
                    }],
                    copies=1,
                    rse_expression=rse_exp,
                )
            action = 'added'

        elif rrule is not None and not self.is_at_pnn:
            # removing rule
            if dry:
                logging.dry("Removing rule for dataset %s at rse %s.",
                            self.dataset, self.rse)
            else:
                self.rcli.delete_replication_rule(rrule['id'],
                                                  purge_replicas=False)
            action = 'removed'

        return action

    def update(self, dry=False):
        """
        syncronize the dataset replica info.
        :dry:  Drydrun. default false
        """
        ret = {'at_node': self.is_at_pnn}

        #datasets and containers are only added
        ret['container'] = self.register_container(dry)
        ret['dataset'] = self.register_dataset(dry)

        ret['replicas'] = self.update_replicas(dry)
        ret['rule'] = self.update_rule(dry)

        return ret
예제 #11
0
        help='dataset to be updates. Can have wildcard and can be multiple')
    PARSER.add_argument('--pool',
                        dest='pool',
                        default=1,
                        help='number of parallel threads. Default 1.')

    OPTIONS = PARSER.parse_args()

    logging.my_lvl(OPTIONS.verbosity)

    #    logging.summary('DBP1')

    install_mp_handler()
    POOL = multiprocessing.Pool(int(OPTIONS.pool))

    PCLI = PhEDEx()

    PNNS = PCLI.pnns(select=OPTIONS.pnn)

    TIMING = {}

    WILDCARD = re.compile(r'\S*[*]\S*')

    DATASETS = get_timing(_get_dset_list(PCLI, OPTIONS.dataset), TIMING)

    PROCS = get_timing(_launch_workers(PNNS, DATASETS, POOL, OPTIONS, PCLI),
                       TIMING)

    get_timing(_get_workers(POOL, PROCS), TIMING)

    logging.summary(
예제 #12
0
class SiteSyncer(object):
    def __init__(self, options):
        self.options = options

        self.config = load_config(options.config)
        self.last_synced = {}  # load_last_synced()
        self.phedex_svc = PhEDEx()
        self.patterns = []

        return

    def sync_site(self, site_pair):
        """
        Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site
        :return:
        """
        site, prefix = site_pair

        if site.endswith('_Tape'):
            pnn = site.replace('_Tape', '_MSS')
        else:
            pnn = site

        if site == 'T3_CH_CERN_CTA_CastorTest':
            pnn = 'T0_CH_CERN_MSS'

        # now = int(time.time())

        # Set 1980 as the last sync date if no data exists
        # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600)
        # last_week = int(site_last_synced - 7 * 24 * 3600)

        if self.config.get('default', None):
            if self.config['default'].get('chunck', 0):
                BLOCKS_PER_ACTION = int(self.config['default']['chunck'])
            if self.config['default'].get('select', None):
                self.patterns = [self.config['default']['select']]

        with monitor.record_timer_block('cms_sync.time_site_sync'):

            r_timer = 'cms_sync.time_rucio_block_list_all'
            p_timer = 'cms_sync.time_phedex_block_list_all'
            if prefix:
                r_timer = 'cms_sync.time_rucio_block_list_partial'
                p_timer = 'cms_sync.time_phedex_block_list_partial'

            # Add touches to keep from getting killed as long as progress is being made
            with monitor.record_timer_block(p_timer):
                touch(text='PQ ' + site)
                phedex_blocks = self.phedex_svc.blocks_at_site(pnn=pnn,
                                                               prefix=prefix,
                                                               since=None)
            with monitor.record_timer_block(r_timer):
                touch(text='RQ ' + site)
                rucio_blocks = self.get_datasets_at_rse(rse=site,
                                                        prefix=prefix)
                touch(text='DQ ' + site)

            n_blocks_in_phedex = len(phedex_blocks)
            n_blocks_in_rucio = len(rucio_blocks)

            # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed
            if not n_blocks_in_phedex and n_blocks_in_rucio:
                logging.warning(
                    "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s",
                    site, n_blocks_in_phedex, n_blocks_in_rucio, prefix)
                return
            if not n_blocks_in_phedex and not n_blocks_in_rucio:
                logging.info(
                    "At %s:%s, nothing in PhEDEx or Rucio. Quitting." %
                    (site, prefix))
                return

            block_report = compare_site_blocks(phedex=phedex_blocks,
                                               rucio=rucio_blocks,
                                               rse=site,
                                               patterns=self.patterns)

            n_blocks_not_in_rucio = len(block_report['not_rucio'])
            n_blocks_not_in_phedex = len(block_report['not_phedex'])
            n_incomplete_blocks = len(block_report['incomplete'])

            logging.info("At %s:%s In both/PhEDEx only/Rucio only: %s/%s/%s" %
                         (site, prefix, len(block_report['complete']),
                          n_blocks_not_in_rucio, n_blocks_not_in_phedex))
            if len(block_report['complete']
                   ) or n_blocks_not_in_rucio or n_blocks_not_in_phedex:
                logging.info(
                    'At %s:%s %3.0f%% complete', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio +
                     n_blocks_not_in_phedex))
            if len(block_report['complete']) or n_blocks_not_in_rucio:
                logging.info(
                    'At %s:%s %3.0f%% completely added', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio))
            # Truncate lists if we want to reduce cycle time
            if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION:
                block_report['not_rucio'] = set(
                    list(block_report['not_rucio'])[:BLOCKS_PER_ACTION])
                n_blocks_not_in_rucio = len(block_report['not_rucio'])
            if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION:
                block_report['not_phedex'] = set(
                    list(block_report['not_phedex'])[:BLOCKS_PER_ACTION])
                n_blocks_not_in_phedex = len(block_report['not_phedex'])

            logging.info('Adding   %6d blocks to   Rucio for %s:%s',
                         n_blocks_not_in_rucio, site, prefix)
            for block in block_report['not_rucio']:
                logging.info('Adding to rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)
                bs.add_to_rucio()

            logging.info('Removing %6d blocks from Rucio for %s:%s',
                         n_blocks_not_in_phedex, site, prefix)
            for block in block_report['not_phedex']:
                logging.info('Removing from rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)
                bs.remove_from_rucio()

            for block in block_report['incomplete']:
                logging.warn('Redoing sync for %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)
                bs.add_to_rucio(recover=True)

            logging.info('Finished syncing                      %s:%s' %
                         (site, prefix))

    def chunks_to_sync(self):
        """
        Turn the config into a list of site/prefix pairs which need to be synced
        :return: The site prefix pairs
        """

        to_sync = []

        for site, site_config in self.config.items():
            print('Site %s (%s)is ok %s' %
                  (site, type(site), site not in ['default', 'main']))
            if site not in ['default', 'main']:
                if site_config.get('multi_das_calls', False):
                    for prefix in list(string.ascii_letters + string.digits):
                        if (('CERN' in site) or ('FNAL' in site) or
                            ('_Tape' in site)) and prefix == 'S':
                            for fnal_prefix in ('Sc', 'Se', 'Si', 'Sp', 'St',
                                                'SI', 'SM', 'ST', 'SU', 'SV',
                                                'SS', 'Su', 'SP', 'SL'):
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site)) and prefix == 'M':
                            for fnal_prefix in ('Ma', 'MC', 'ME', 'Mi', 'Mo',
                                                'MS', 'Mu'):
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site)) and prefix == 'D':
                            for fnal_prefix in ('D0', 'Da', 'Di', 'DM', 'Do',
                                                'Dp', 'DP', 'Ds', 'DS', 'DY'):
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site)) and prefix == 'T':
                            for fnal_prefix in ('T1', 'T4', 'T5', 'TH', 'TK',
                                                'TO', 'TA', 'TB', 'TC', 'TG',
                                                'TZ', 'T_', 'TS', 'TT', 'TW',
                                                'Tk', 'To', 'Ta', 'Tb', 'Te',
                                                'Tp', 'Tr', 'Ts', 'Tt', 'Tw',
                                                'Ty'):
                                to_sync.append((site, fnal_prefix))
                        elif (('CERN' in site) or
                              ('FNAL' in site)) and prefix == 'H':
                            for fnal_prefix in ('H0', 'H1', 'H2', 'H3', 'H4',
                                                'H5', 'H6', 'Ha', 'HA', 'Hc',
                                                'He', 'HE', 'HF', 'Hi', 'HI',
                                                'HJ', 'HL', 'Hp', 'HP', 'Hs',
                                                'HS', 'HT', 'HV', 'HW', 'Hy',
                                                'HZ'):
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site) or
                              ('_CTA' in site)) and prefix == 'C':
                            for fnal_prefix in ('Ca', 'CE', 'CG', 'Ch', 'CI',
                                                'CM', 'Co', 'CS'):
                                to_sync.append((site, fnal_prefix))
                        elif (('CERN' in site) or
                              ('FNAL' in site)) and prefix == 'Z':
                            for fnal_prefix in ('Z0', 'Z1', 'Z2', 'Z3', 'Z4',
                                                'Z5', 'ZA', 'Zb', 'ZB', 'Zc',
                                                'ZC', 'Ze', 'ZE', 'ZG', 'ZH',
                                                'ZJ', 'ZL', 'Zm', 'ZM', 'Zn',
                                                'ZN', 'Zp', 'ZP', 'ZR', 'Zt',
                                                'ZT', 'ZU', 'ZV', 'ZZ'):
                                to_sync.append((site, fnal_prefix))
                        elif (('CERN' in site) or
                              ('FNAL' in site)) and prefix == 'G':
                            for fnal_prefix in ('G_', 'G1', 'Ga', 'Ge', 'GF',
                                                'GG', 'Gj', 'GJ', 'Gl', 'GM',
                                                'Gr', 'Gs', 'GV'):
                                to_sync.append((site, fnal_prefix))
                        else:
                            to_sync.append((site, prefix))
                else:
                    to_sync.append((site, None))

        # Cut the list (keep in order but choose a random starting point)
        offset = random.randrange(len(to_sync))
        to_sync = to_sync[offset:] + to_sync[:offset]

        to_sync = [
            #     # ('T1_US_FNAL_Tape', 'ST_s-channel_4f_leptonDecays_TuneCP5_13TeV-amcatnlo-pythia8/RunIISummer19UL18RECO-106X_upgrade2018_realistic_v11_L1v1-v1'),
            ('T0_CH_CERN_Tape', 'DQ'),
            ('T0_CH_CERN_Tape', 'TAC'),
            #     # ('T1_US_FNAL_Tape', 'VBFH_HToSSTo4Tau_MH-125_TuneCUETP8M1_13TeV-powheg-pythia8/RunIISummer16DR80Premix-PUMoriond17_rp_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v2'),
            #     # ('T1_US_FNAL_Tape', 'ZeroBias1/Commissioning2018-26Apr2018-v1'),
        ]

        return to_sync

    @staticmethod
    def get_datasets_at_rse(rse, prefix=None):
        """

        :param rse: The RSE name
        :param prefix: Character(s) to restrict the dataset search
        :return: a dictionary with <dataset name>: <number of files>
        """

        filters = {'scope': 'cms', 'did_type': DIDType.DATASET}
        if prefix:
            filters['name'] = '/' + prefix + '*'

        account = SYNC_ACCOUNT_FMT % rse.lower()
        rule_filters = {
            'account': account,
            'scope': 'cms',
            'did_type': DIDType.DATASET
        }

        with monitor.record_timer_block('cms_sync.time_rse_datasets'):
            synced_ds = {
                item['name']
                for item in list_replication_rules(filters=rule_filters)
                if item['expires_at'] is None and (
                    prefix is None or item['name'].startswith('/' + prefix))
            }

            all_datasets = [
                dataset['name']
                for dataset in list_datasets_per_rse(rse=rse, filters=filters)
            ]

            logging.info('Getting all datasets at %s with prefix %s' %
                         (rse, prefix))

            datasets = {}

            for dataset in all_datasets:
                if dataset in synced_ds:
                    for ds in list_dataset_replicas(scope='cms',
                                                    name=dataset,
                                                    deep=True):
                        if ds['rse'] == rse:
                            datasets.update({dataset: ds['available_length']})

        return datasets
예제 #13
0
class CMSRSE(object):
    """
    Wrapping the definition of a CMS RSE. Gathering the information
    from PhEDEx and translating them into the definition of a Rucio RSE
    for the different expected types: real, test, temp.
    """
    def __init__(self,
                 pnn,
                 account,
                 auth_type=None,
                 rsetype=DEFAULT_RSETYPE,
                 suffix=None,
                 dry=False,
                 fts=None,
                 tier=None,
                 lfn2pfn_algorithm=None,
                 country=None,
                 attrs=None,
                 seinfo=None,
                 tfc=None,
                 tfc_exclude=EXCLUDE_TFC,
                 domains=None,
                 space_token=None,
                 add_prefix=None,
                 proto=DEFAULT_PROTOCOL,
                 instance=DEFAULT_PHEDEX_INST,
                 dasgoclient=DEFAULT_DASGOCLIENT,
                 datasvc=DEFAULT_DATASVC_URL):

        attrs = attrs or []

        self.pnn = pnn
        self.rsetype = rsetype
        if suffix is None:
            suffix = DEFAULT_SUFFIXES[rsetype]

        self.suffix = suffix
        if pnn.endswith('_MSS'):
            raise ValueError(
                'Please import PhEDEx _Buffer pnns rather than _MSS for tape endpoints'
            )
        elif pnn.endswith('_Buffer'):
            self.rsename = pnn.replace('_Buffer', '_Tape') + self.suffix
            self.rucio_rse_type = 'TAPE'
        else:
            self.rsename = pnn + self.suffix
            self.rucio_rse_type = 'DISK'

        if tfc and os.path.isdir(tfc):
            self.tfc = tfc + '/' + pnn + '/PhEDEx/storage.xml'
        else:
            self.tfc = tfc

        self.pcli = PhEDEx(instance=instance,
                           dasgoclient=dasgoclient,
                           datasvc=datasvc)
        self.rcli = Client(account=account, auth_type=auth_type)

        self.dry = dry

        self._get_attributes(fts, tier, lfn2pfn_algorithm, country, attrs)

        self._get_settings()

        self._get_protocol(seinfo, add_prefix, tfc_exclude, domains,
                           space_token, proto)

    def _get_attributes(self, fts, tier, lfn2pfn_algorithm, country, xattrs):
        """
        Gets the expected RSE attributes according to the
        given cmsrse parameters and to the info from phedex
        :fts:               fts server. If None the server defined for
                            the pnn is taken.
        :tier:              tier. If None it is taken from pnn
        :lfn2pfn_algorithm: algorithm for lfn2pfn. If None the default
                            rsetype to lfn2pfn mapping is used
        :country:           country code. If None it is taken from pnn
        :xattrs:            extra attributes
        """

        attrs = {}
        attrs['fts'] = fts or self.pcli.fts(self.pnn)[0]

        pnn_match = PNN_MATCH.match(self.pnn)

        attrs['tier'] = tier or pnn_match.group(1)

        attrs['country'] = country or pnn_match.group(2)

        attrs['lfn2pfn_algorithm'] = lfn2pfn_algorithm or LFN2PFN_BYTYPE[
            self.rsetype]

        attrs[self.rsename] = 'True'

        attrs['pnn'] = self.pnn

        attrs['cms_type'] = self.rsetype

        for (key, value) in xattrs:
            attrs[key] = value

        self.attrs = attrs

    def _set_attributes(self):
        try:
            rattrs = self.rcli.list_rse_attributes(rse=self.rsename)
        except RSENotFound:
            rattrs = {}

        changed = False

        for (key, value) in self.attrs.items():
            if key not in rattrs or rattrs[key] != value:
                # Hack. I can find no way to define an attribute to 1
                # (systematically reinterpreted as True)
                if key in rattrs and rattrs[key] is True and \
                        (str(value) == '1' or str(value) == 'True'):
                    continue

                if key not in rattrs:
                    rattrs[key] = 'None'
                logging.debug(
                    'setting attribute %s from value %s to value %s for rse %s',
                    key, rattrs[key], value, self.rsename)
                changed = True
                if self.dry:
                    logging.info(
                        'setting attribute %s to value %s for rse %s. Dry run, skipping',
                        key, value, self.rsename)
                else:
                    self.rcli.add_rse_attribute(rse=self.rsename,
                                                key=key,
                                                value=value)

        return changed

    def _get_settings(self):
        """
        Get expected settings for the RSE
        (so far only deterministic vs non-deterministic)
        """
        self.settings = {}
        if self.attrs['lfn2pfn_algorithm'] == 'hash':
            self.settings['deterministic'] = False
        else:
            self.settings['deterministic'] = True

    def _check_lfn2pfn(self):
        """
        Checks that lfn2pfn works properly
        """
        for lfn in SE_PROBES_BYTYPE[self.rsetype]:

            # this is what rucio does
            pfn = self.proto['scheme'] + '://' + self.proto['hostname'] + \
                  ':' + str(self.proto['port'])

            if 'web_service_path' in self.proto['extended_attributes']:
                pfn = pfn + self.proto['extended_attributes'][
                    'web_service_path']

            pfn = pfn + '/' + cmstfc('cms', lfn, None, None, self.proto)

            # this should match dataservice pfn, modulo some normalization
            # (e.g.: adding the port number)
            pfn_datasvc = []

            wo_port = self.pcli.lfn2pfn(
                pnn=self.pnn,
                lfn=lfn,
                tfc=self.tfc,
                protocol=self.proto['extended_attributes']['tfc_proto'])
            wo_port = re.sub('/+', '/', wo_port)
            w_port = wo_port.replace(
                self.proto['hostname'],
                self.proto['hostname'] + ':' + str(self.proto['port']))

            # Get rid of ALL multiple slashes, including separating protocol from host (valid for comparison only)
            pfn_datasvc.append(wo_port)
            pfn_datasvc.append(w_port)
            pfn = re.sub('/+', '/', pfn)

            if pfn not in pfn_datasvc:
                raise Exception(
                    "rucio and datasvc lfn2pfn mismatch, rucio: %s ; datasvc: %s"
                    % (pfn, pfn_datasvc))

            logging.debug("checking lfn2pfn ok %s", pfn)

    def _get_protocol(self, seinfo, add_prefix, exclude, domains, token,
                      proto):
        """
        Get the informations about the RSE protocol from creator argument or
        from phedex
        :seinfo:      informations about the SE (in the form of the seinfo method of PhEDEx class).
                      If None the info is gathered from PhEDEx using the seinfo method.
        :add_prefix:  path to be added to the prefix in seinfo. if none
                      SE_ADD_PREFIX_BYTYPE is used.
        :tfc:         dictionnary with tfc rules. If None the info is gathered from PhEDEx using
                      the PhEDEx.tfc method,
        :exclude:     rules to be excluded from tfc (in case it is gathered from PhEDEx).
        :domains:     domains dictionnary. If none the DOMAINS_BYTYPE constant is used.
        :token:       space token. default None
        :proto:       protocol to be considered. default DEFAULT_PROTOCOL.
        """

        seinfo = seinfo or self.pcli.seinfo(
            pnn=self.pnn,
            probes=SE_PROBES_BYTYPE[self.rsetype],
            protocol=proto,
            tfc=self.tfc)

        if self.tfc is not None and self.tfc[0] == '/':
            pnn_arg = self.tfc
            self.tfc = None
        else:
            pnn_arg = self.pnn

        self.tfc = self.tfc or self.pcli.tfc(pnn=pnn_arg,
                                             dump=False,
                                             exclude=exclude,
                                             normalize=seinfo,
                                             proto=proto)

        domains = domains or DOMAINS_BYTYPE[self.rsetype]

        self.proto = {
            'scheme': seinfo['protocol'],
            'hostname': seinfo['hostname'],
            'port': seinfo['port'],
            'extended_attributes': {},
            'domains': domains
        }

        if 'webpath' in seinfo:
            self.proto['extended_attributes']['web_service_path'] = seinfo[
                'webpath']

        if self.attrs['lfn2pfn_algorithm'] == 'cmstfc':
            self.proto['prefix'] = '/'
            self.proto['extended_attributes']['tfc_proto'] = proto
            self.proto['extended_attributes']['tfc'] = self.tfc
            self._check_lfn2pfn()
        else:
            if self.rsetype == "temp":
                if 'webpath' in seinfo:
                    self.proto['prefix'] = seinfo['prefix']
                else:
                    self.proto['prefix'] = '/' + seinfo['prefix']
            else:
                self.proto['prefix'] = seinfo['prefix']

        if add_prefix is None:
            add_prefix = SE_ADD_PREFIX_BYTYPE[self.rsetype]

        self.proto['prefix'] += add_prefix

        if token:
            self.proto['extended_attributes']['space_token'] = token

        if self.proto['extended_attributes'] == {}:
            self.proto['extended_attributes'] = None

        self.proto['impl'] = 'rucio.rse.protocols.gfalv2.Default'

    def _set_protocol(self):
        try:
            rprotos = self.rcli.get_protocols(rse=self.rsename)
        except (RSEProtocolNotSupported, RSENotFound):
            rprotos = []

        rproto = {}

        for item in rprotos:
            if item['scheme'] == self.proto['scheme']:
                rproto = item
                break

        update = False
        if self.proto != rproto:
            logging.debug(
                "protocol definition not as expected: rucio=%s, expected=%s",
                str(rproto), str(self.proto))
            update = True

        if update:
            if self.dry:
                logging.info('Modifying protocol to %s. Dry run, skipping',
                             str(self.proto))
                return update

            try:
                self.rcli.delete_protocols(rse=self.rsename,
                                           scheme=self.proto['scheme'])
            except RSEProtocolNotSupported:
                logging.debug("Cannot remove protocol (scheme, rse) = (%s,%s)",
                              self.proto['scheme'], self.rsename)
            if (self.proto['scheme'] == 'srm'
                    and 'extended_attribute' in self.proto and
                    'web_service_path' in self.proto['extended_attributes']):
                self.rcli.add_protocol(rse=self.rsename, params=self.proto)

        return update

    def _create_rse(self):

        create = False

        try:
            rse = self.rcli.get_rse(self.rsename)
        except RSENotFound:
            create = True

        if not create and rse['deterministic'] != self.settings[
                'deterministic']:
            raise Exception(
                "The rse %s was created with the wrong deterministic setting!",
                self.rsename)

        if create:
            if self.dry:
                logging.info(
                    'creating rse %s with deterministic %s and type %s. Dry run, skipping',
                    self.rsename, self.settings['deterministic'],
                    self.rucio_rse_type)
            else:
                self.rcli.add_rse(self.rsename,
                                  deterministic=self.settings['deterministic'],
                                  rse_type=self.rucio_rse_type)
                logging.debug('created rse %s', self.rsename)

        return create

    def update(self):
        """
        Creates, if needed, and updates the RSE according
        to CMS rules and PhEDEx data.
        """

        create_res = self._create_rse()

        attrs_res = self._set_attributes()
        proto_res = self._set_protocol()

        return create_res or attrs_res or proto_res
예제 #14
0
    OPTIONS = PARSER.parse_args()

    if OPTIONS.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    if OPTIONS.domains:
        OPTIONS.domains = json.loads(OPTIONS.domains.replace("'", '"'))

    if OPTIONS.seinfo is not None:
        OPTIONS.seinfo = json.loads(OPTIONS.seinfo.replace("'", '"'))

    if 'all' in OPTIONS.pnn:
        OPTIONS.pnn = PhEDEx(instance=OPTIONS.instance).pnns(
            select=OPTIONS.select, exclude=OPTIONS.exclude)

    CHANGED = []
    TOT = []

    for node_name in OPTIONS.pnn:
        for rse_type in OPTIONS.type:
            logging.info('Starting pnn %s and type %s', node_name, rse_type)
            RSE = CMSRSE(pnn=node_name,
                         rsetype=rse_type,
                         account=OPTIONS.account,
                         dry=OPTIONS.dry,
                         suffix=OPTIONS.suffix,
                         fts=OPTIONS.fts,
                         tier=OPTIONS.tier,
                         lfn2pfn_algorithm=OPTIONS.lfn2pfn,
예제 #15
0
class LinksMatrix(object):
    """
    CMS RSE distances according to a set of rules
    """
    def __init__(self,
                 account,
                 auth_type=None,
                 exclude=DEFAULT_EXCLUDE_LINKS,
                 distance=None,
                 phedex_links=False,
                 rselist=None,
                 instance=DEFAULT_PHEDEX_INST,
                 datasvc=DEFAULT_DATASVC_URL):

        if distance is None:
            distance = DEFAULT_DISTANCE_RULES

        self.pcli = PhEDEx(instance=instance, datasvc=datasvc)
        self.rcli = Client(account=account, auth_type=auth_type)

        self._get_rselist(rselist)

        self._get_matrix(distance, phedex_links, exclude)

    def _get_rselist(self, rselist=None):

        self.rselist = []

        if rselist is None:
            rselist = [rse['rse'] for rse in self.rcli.list_rses()]

        for rse in rselist:
            attrs = self.rcli.list_rse_attributes(rse=rse)

            try:
                self.rselist.append({
                    'rse': rse,
                    'pnn': attrs['pnn'],
                    'type': attrs['cms_type'],
                    'country': attrs['country'],
                    'region': attrs.get('region', None)
                })
            except KeyError:
                logging.warning('No expected attributes for RSE %s. Skipping',
                                rse)

    def _get_matrix(self, distance, phedex_links, exclude):

        if phedex_links:
            matrix = self.pcli.links()
        else:
            matrix = {}

        self.links = {}

        for src in self.rselist:
            for dest in self.rselist:

                src_rse = src['rse']
                dest_rse = dest['rse']
                src_pnn = src['pnn']
                dest_pnn = dest['pnn']

                link = -1

                # Within site or in defined region, don't consult PhEDEx
                if dest_pnn == src_pnn:
                    link = distance['site']
                elif src['region'] and dest['region'] and src[
                        'region'] == dest['region']:
                    if src['country'] == dest['country']:
                        link = distance['region&country']
                    else:
                        link = distance['region']
                elif src_pnn in matrix and dest_pnn in matrix[src_pnn]:
                    # If no information, use PhEDEx info if it exists
                    link = distance['site'] - matrix[src_pnn][dest_pnn]
                else:
                    if src['country'] == dest['country']:
                        link = distance['country']
                    else:
                        link = distance['other']

                if src_rse not in self.links:
                    self.links[src_rse] = {}

                self.links[src_rse][dest_rse] = link

        self._filter_matrix(exclude)

    def _filter_matrix(self, exclude):

        for src in self.rselist:
            for dest in self.rselist:

                if src['rse'] == dest['rse']:
                    continue

                for rule in exclude:
                    matched = True

                    for item in rule['src']:
                        if not re.match(rule['src'][item], src[item]):
                            matched = False

                    for item in rule['dest']:
                        if not re.match(rule['dest'][item], dest[item]):
                            matched = False

                    if matched:
                        self.links[src['rse']][dest['rse']] = -1
                        break

    def update(self,
               overwrite=False,
               disable=True,
               dry=False,
               srcselect=r'\S+',
               dstselect=r'\S+'):
        """
        Updates distances according to what is expected
        :overwrite:   overwrite distance of the links that already exist
        :disable:     set ranking to 0 for the links that should be disabled
        :dry:         dry run
        """

        count = {'checked': [], 'created': [], 'updated': [], 'disabled': []}

        src_regex = re.compile(srcselect)
        dst_regex = re.compile(dstselect)

        for src in self.rselist:
            srse = src['rse']
            logging.info("Setting links from %s to %s other RSEs.", srse,
                         len(self.rselist))
            for dest in self.rselist:
                drse = dest['rse']

                if srse == drse or not src_regex.match(
                        srse) or not dst_regex.match(drse):
                    continue

                count['checked'].append([srse, drse])

                # Todo.. doublecheck I'm not reversing things
                link = self.rcli.get_distance(srse, drse)

                if srse in self.links and drse in self.links[
                        srse] and self.links[srse][drse] >= 0:
                    if not link:
                        pars = {
                            'distance': 1,
                            'ranking': self.links[srse][drse]
                        }

                        if dry:
                            logging.info(
                                "adding link from %s to %s with %s. Dry Run",
                                srse, drse, str(pars))
                        else:
                            self.rcli.add_distance(srse, drse, pars)

                        count['created'].append([srse, drse])

                    elif link and overwrite:
                        if dry:
                            logging.info(
                                "setting distance %s for link from %s to %s. Dry run.",
                                self.links[srse][drse], srse, drse)
                        else:
                            self.rcli.update_distance(
                                srse, drse, {
                                    'ranking': self.links[srse][drse],
                                    'distance': 1
                                })

                        count['updated'].append([srse, drse])

                elif link and disable:
                    if dry:
                        logging.info("disabling link from %s to %s. Dry run",
                                     srse, drse)
                    else:
                        self.rcli.update_distance(srse, drse, {
                            'ranking': None,
                            'distance': None,
                        })

                    count['disabled'].append([srse, drse])

        return count
예제 #16
0
class SiteSyncer(object):
    def __init__(self, options):
        self.options = options

        self.config = load_config(options.config)
        self.last_synced = {}  # load_last_synced()
        self.phedex_svc = PhEDEx()

        pass

    def sync_site(self, site_pair):
        """
        Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site
        :return:
        """
        site, prefix = site_pair
        # now = int(time.time())

        # Set 1980 as the last sync date if no data exists
        # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600)
        # last_week = int(site_last_synced - 7 * 24 * 3600)

        if self.config.get('default', None):
            if self.config['default'].get('chunck', 0):
                BLOCKS_PER_ACTION = int(self.config['default']['chunck'])

        with monitor.record_timer_block('cms_sync.time_site_sync'):

            r_timer = 'cms_sync.time_rucio_block_list_all'
            p_timer = 'cms_sync.time_phedex_block_list_all'
            if prefix:
                r_timer = 'cms_sync.time_rucio_block_list_partial'
                p_timer = 'cms_sync.time_phedex_block_list_partial'

            with monitor.record_timer_block(p_timer):
                phedex_blocks = self.phedex_svc.blocks_at_site(pnn=site,
                                                               prefix=prefix,
                                                               since=None)
            with monitor.record_timer_block(r_timer):
                rucio_blocks = self.get_datasets_at_rse(rse=site,
                                                        prefix=prefix)

            n_blocks_in_phedex = len(phedex_blocks)
            n_blocks_in_rucio = len(rucio_blocks)

            # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed
            if not n_blocks_in_phedex and n_blocks_in_rucio:
                logging.warning(
                    "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s",
                    site, n_blocks_in_phedex, n_blocks_in_rucio, prefix)
                return
            if not n_blocks_in_phedex and not n_blocks_in_rucio:
                logging.info(
                    "At %s:%s, nothing in PhEDEx or Rucio. Quitting." %
                    (site, prefix))
                return

            block_report = compare_site_blocks(phedex=phedex_blocks,
                                               rucio=rucio_blocks,
                                               rse=site)

            n_blocks_not_in_rucio = len(block_report['not_rucio'])
            n_blocks_not_in_phedex = len(block_report['not_phedex'])
            n_incomplete_blocks = len(block_report['incomplete'])

            logging.info("At %s: In both/PhEDEx only/Rucio only: %s/%s/%s" %
                         (site, len(block_report['complete']),
                          n_blocks_not_in_rucio, n_blocks_not_in_phedex))

            # Truncate lists if we want to reduce cycle time
            if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION:
                block_report['not_rucio'] = set(
                    list(block_report['not_rucio'])[:BLOCKS_PER_ACTION])
                n_blocks_not_in_rucio = len(block_report['not_rucio'])
            if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION:
                block_report['not_phedex'] = set(
                    list(block_report['not_phedex'])[:BLOCKS_PER_ACTION])
                n_blocks_not_in_phedex = len(block_report['not_phedex'])

            logging.info('Adding   %6d blocks to   Rucio for %s:%s',
                         n_blocks_not_in_rucio, site, prefix)
            for block in block_report['not_rucio']:
                bs = BlockSyncer(block_name=block, pnn=site, rse=site)
                bs.add_to_rucio()

            logging.info('Removing %6d blocks from Rucio for %s:%s',
                         n_blocks_not_in_phedex, site, prefix)
            for block in block_report['not_phedex']:
                bs = BlockSyncer(block_name=block, pnn=site, rse=site)
                bs.remove_from_rucio()

            for block in block_report['incomplete']:
                logging.warn('Redoing sync for %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=site, rse=site)
                bs.add_to_rucio(recover=True)

            logging.info('Finished syncing                      %s:%s' %
                         (site, prefix))

        # FIXME: Resurrect code to check for size differences

        # self.last_synced[site_pair] = now
        # save_last_synced(self.last_synced)

    def chunks_to_sync(self):
        """
        Turn the config into a list of site/prefix pairs which need to be synced
        :return: The site prefix pairs
        """

        to_sync = []

        for site, site_config in self.config.items():
            if site not in ['default', 'main']:
                if site_config.get('multi_das_calls', False):
                    for prefix in list(string.letters + string.digits):
                        to_sync.append((site, prefix))
                else:
                    to_sync.append((site, None))
        random.shuffle(to_sync)
        return to_sync

    @staticmethod
    def get_datasets_at_rse(rse, prefix=None):
        """

        :param rse: The RSE name
        :param prefix: Character(s) to restrict the dataset search
        :return: a dictionary with <dataset name>: <number of files>
        """

        filters = {'scope': 'cms', 'did_type': DIDType.DATASET}
        if prefix:
            filters['name'] = '/' + prefix + '*'

        with monitor.record_timer_block('cms_sync.time_rse_datasets'):
            all_datasets = [
                dataset['name']
                for dataset in list_datasets_per_rse(rse=rse, filters=filters)
            ]

            for dataset in all_datasets:
                datasets = {
                    dataset: ds['available_length']
                    for ds in list_dataset_replicas(
                        scope='cms', name=dataset, deep=True)
                    if ds['rse'] == rse
                }

            # datasets = {dataset['name']: dataset['available_length']
            #             for dataset in list_datasets_per_rse(rse=rse, filters=filters, deep=True)}

        return datasets