def __init__(self, pnn, account, auth_type=None, rsetype=DEFAULT_RSETYPE, suffix=None, dry=False, fts=None, tier=None, lfn2pfn_algorithm=None, country=None, attrs=None, seinfo=None, tfc=None, tfc_exclude=EXCLUDE_TFC, domains=None, space_token=None, add_prefix=None, proto=DEFAULT_PROTOCOL, instance=DEFAULT_PHEDEX_INST, dasgoclient=DEFAULT_DASGOCLIENT, datasvc=DEFAULT_DATASVC_URL): attrs = attrs or [] self.pnn = pnn self.rsetype = rsetype if suffix is None: suffix = DEFAULT_SUFFIXES[rsetype] self.suffix = suffix self.rsename = pnn + self.suffix if tfc and os.path.isdir(tfc): self.tfc = tfc + '/' + pnn + '/PhEDEx/storage.xml' else: self.tfc = tfc self.pcli = PhEDEx(instance=instance, dasgoclient=dasgoclient, datasvc=datasvc) self.rcli = Client(account=account, auth_type=auth_type) self.dry = dry self._get_attributes(fts, tier, lfn2pfn_algorithm, country, attrs) self._get_settings() self._get_protocol(seinfo, add_prefix, tfc_exclude, domains, space_token, proto)
def __init__(self, pnn, account, auth_type=None, rsetype=DEFAULT_RSETYPE, suffix=None, dry=False, fts=None, tier=None, lfn2pfn_algorithm=None, country=None, attrs=None, seinfo=None, tfc=None, tfc_exclude=EXCLUDE_TFC, domains=None, space_token=None, add_prefix=None, proto=DEFAULT_PROTOCOL, instance=DEFAULT_PHEDEX_INST, dasgoclient=DEFAULT_DASGOCLIENT, datasvc=DEFAULT_DATASVC_URL): attrs = attrs or [] self.pnn = pnn self.rsetype = rsetype if suffix is None: suffix = DEFAULT_SUFFIXES[rsetype] self.suffix = suffix if pnn.endswith('_MSS'): raise ValueError( 'Please import PhEDEx _Buffer pnns rather than _MSS for tape endpoints' ) elif pnn.endswith('_Buffer'): self.rsename = pnn.replace('_Buffer', '_Tape') + self.suffix self.rucio_rse_type = 'TAPE' else: self.rsename = pnn + self.suffix self.rucio_rse_type = 'DISK' if tfc and os.path.isdir(tfc): self.tfc = tfc + '/' + pnn + '/PhEDEx/storage.xml' else: self.tfc = tfc self.pcli = PhEDEx(instance=instance, dasgoclient=dasgoclient, datasvc=datasvc) self.rcli = Client(account=account, auth_type=auth_type) self.dry = dry self._get_attributes(fts, tier, lfn2pfn_algorithm, country, attrs) self._get_settings() self._get_protocol(seinfo, add_prefix, tfc_exclude, domains, space_token, proto)
def __init__(self, options): self.options = options self.config = load_config(options.config) self.last_synced = {} # load_last_synced() self.phedex_svc = PhEDEx() pass
def _get_pcli(self, pcli): if pcli is None: pcli = {} if isinstance(pcli, dict): self.pcli = PhEDEx(**pcli) elif isinstance(pcli, PhEDEx): # pylint: disable=redefined-variable-type self.pcli = pcli else: raise Exception("wrong type for pcli parameter %s" % type(pcli))
def __init__(self, block_name, pnn, rse=None, lifetime=None, dry_run=False): """ Get the status of replica of pditem at pnn considering only closed blocks completely replicated at site. :rds: PhEDEx block name. :pnn: PhEDEx node name. :rse: Rucio RSE. If None (default) inferred by the pnn using DEFAULT_RSE_FMT. :scope: Scope. Default: DEFAULT_SCOPE. """ self.phedex_svc = PhEDEx() self.dry_run = dry_run self.pnn = pnn if rse is None: self.rse = list_rses('cms_type=real&pnn=%s' % self.pnn)[0]['rse'] else: self.rse = rse rse_details = get_rse(self.rse) self.rse_id = rse_details['id'] self.account = (SYNC_ACCOUNT_FMT % self.rse.lower())[:25] self.container = self.phedex_svc.check_data_item( pditem=block_name)['pds'] self.scope = DEFAULT_SCOPE self.block_name = block_name self.lifetime = lifetime self.group, self.custodial, self.is_at_pnn = self.phedex_svc.block_at_pnn_phedex( block=self.block_name, pnn=self.pnn) self.block_in_phedex = self.phedex_svc.block_exists( block=self.block_name) self.block_known = self.phedex_svc.block_known(block=self.block_name) if self.is_at_pnn: self.replicas = self.phedex_svc.fileblock_files_phedex( pnn=pnn, pfb=block_name) else: self.replicas = {} self.container_exists = None self.block_exists = None self.rule_exists = None touch(text=self.rse)
def __init__(self, account, auth_type=None, exclude=DEFAULT_EXCLUDE_LINKS, distance=None, phedex_links=False, rselist=None, instance=DEFAULT_PHEDEX_INST, datasvc=DEFAULT_DATASVC_URL): if distance is None: distance = DEFAULT_DISTANCE_RULES self.pcli = PhEDEx(instance=instance, datasvc=datasvc) self.rcli = Client(account=account, auth_type=auth_type) self._get_rselist(rselist) self._get_matrix(distance, phedex_links, exclude)
class SiteSyncer(object): def __init__(self, options): self.options = options self.config = load_config(options.config) self.last_synced = {} # load_last_synced() self.phedex_svc = PhEDEx() self.patterns = [] return def sync_site(self, site_pair): """ Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site :return: """ site, prefix = site_pair if site.endswith('_Tape'): pnn = site.replace('_Tape', '_MSS') else: pnn = site # now = int(time.time()) # Set 1980 as the last sync date if no data exists # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600) # last_week = int(site_last_synced - 7 * 24 * 3600) if self.config.get('default', None): if self.config['default'].get('chunck', 0): BLOCKS_PER_ACTION = int(self.config['default']['chunck']) if self.config['default'].get('select', None): self.patterns = [self.config['default']['select']] with monitor.record_timer_block('cms_sync.time_site_sync'): r_timer = 'cms_sync.time_rucio_block_list_all' p_timer = 'cms_sync.time_phedex_block_list_all' if prefix: r_timer = 'cms_sync.time_rucio_block_list_partial' p_timer = 'cms_sync.time_phedex_block_list_partial' # Add touches to keep from getting killed as long as progress is being made with monitor.record_timer_block(p_timer): touch(text='PQ ' + site) phedex_blocks = self.phedex_svc.blocks_at_site(pnn=pnn, prefix=prefix, since=None) with monitor.record_timer_block(r_timer): touch(text='RQ ' + site) rucio_blocks = self.get_datasets_at_rse(rse=site, prefix=prefix) touch(text='DQ ' + site) n_blocks_in_phedex = len(phedex_blocks) n_blocks_in_rucio = len(rucio_blocks) # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed if not n_blocks_in_phedex and n_blocks_in_rucio: logging.warning( "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s", site, n_blocks_in_phedex, n_blocks_in_rucio, prefix) return if not n_blocks_in_phedex and not n_blocks_in_rucio: logging.info( "At %s:%s, nothing in PhEDEx or Rucio. Quitting." % (site, prefix)) return block_report = compare_site_blocks(phedex=phedex_blocks, rucio=rucio_blocks, rse=site, patterns=self.patterns) n_blocks_not_in_rucio = len(block_report['not_rucio']) n_blocks_not_in_phedex = len(block_report['not_phedex']) n_incomplete_blocks = len(block_report['incomplete']) logging.info("At %s:%s In both/PhEDEx only/Rucio only: %s/%s/%s" % (site, prefix, len(block_report['complete']), n_blocks_not_in_rucio, n_blocks_not_in_phedex)) if len(block_report['complete'] ) or n_blocks_not_in_rucio or n_blocks_not_in_phedex: logging.info( 'At %s:%s %3.0f%% complete', site, prefix, len(block_report['complete']) * 100 / (len(block_report['complete']) + n_blocks_not_in_rucio + n_blocks_not_in_phedex)) if len(block_report['complete']) or n_blocks_not_in_rucio: logging.info( 'At %s:%s %3.0f%% completely added', site, prefix, len(block_report['complete']) * 100 / (len(block_report['complete']) + n_blocks_not_in_rucio)) # Truncate lists if we want to reduce cycle time if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION: block_report['not_rucio'] = set( list(block_report['not_rucio'])[:BLOCKS_PER_ACTION]) n_blocks_not_in_rucio = len(block_report['not_rucio']) if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION: block_report['not_phedex'] = set( list(block_report['not_phedex'])[:BLOCKS_PER_ACTION]) n_blocks_not_in_phedex = len(block_report['not_phedex']) logging.info('Adding %6d blocks to Rucio for %s:%s', n_blocks_not_in_rucio, site, prefix) for block in block_report['not_rucio']: logging.info('Adding to rucio: %s at %s', block, site) bs = BlockSyncer(block_name=block, pnn=pnn, rse=site) bs.add_to_rucio() logging.info('Removing %6d blocks from Rucio for %s:%s', n_blocks_not_in_phedex, site, prefix) for block in block_report['not_phedex']: logging.info('Removing from rucio: %s at %s', block, site) bs = BlockSyncer(block_name=block, pnn=pnn, rse=site) bs.remove_from_rucio() for block in block_report['incomplete']: logging.warn('Redoing sync for %s at %s', block, site) bs = BlockSyncer(block_name=block, pnn=pnn, rse=site) bs.add_to_rucio(recover=True) logging.info('Finished syncing %s:%s' % (site, prefix)) def chunks_to_sync(self): """ Turn the config into a list of site/prefix pairs which need to be synced :return: The site prefix pairs """ to_sync = [] for site, site_config in self.config.items(): print('Site %s (%s)is ok %s' % (site, type(site), site not in ['default', 'main'])) if site not in ['default', 'main']: if site_config.get('multi_das_calls', False): for prefix in list(string.letters + string.digits): if ('T0' in site or 'FNAL' in site) and prefix == 'S': for fnal_prefix in ('Sc', 'Se', 'Si', 'Sp', 'St', 'SI', 'SM', 'ST', 'SU', 'SV'): to_sync.append((site, fnal_prefix)) elif 'FNAL' in site and prefix == 'M': for fnal_prefix in ('Ma', 'MC', 'ME', 'Mi', 'Mo', 'MS', 'Mu'): to_sync.append((site, fnal_prefix)) elif ('T0' in site or 'FNAL' in site) and prefix == 'D': for fnal_prefix in ('Da', 'Di', 'DM', 'Do', 'DP', 'Ds', 'DS', 'DY'): to_sync.append((site, fnal_prefix)) elif ('T0' in site or 'FNAL' in site) and prefix == 'T': for fnal_prefix in ('T1', 'T4', 'T5', 'TH', 'TK', 'TO', 'TA', 'TB', 'TC', 'TG', 'TZ', 'T_', 'TT', 'TW', 'Tk', 'To', 'Ta', 'Tb', 'Te', 'Tp', 'Tr', 'Ts', 'Tt', 'Tw'): to_sync.append((site, fnal_prefix)) elif ('T0' in site or 'FNAL' in site) and prefix == 'H': for fnal_prefix in ('H0', 'H1', 'Ha', 'He', 'Hi', 'HJ', 'Hp', 'HP', 'Hs', 'HS', 'HT', 'HV', 'HW', 'HZ'): to_sync.append((site, fnal_prefix)) else: to_sync.append((site, prefix)) else: to_sync.append((site, None)) # Cut the list (keep in order but choose a random starting point) offset = random.randrange(len(to_sync)) to_sync = to_sync[offset:] + to_sync[:offset] return to_sync @staticmethod def get_datasets_at_rse(rse, prefix=None): """ :param rse: The RSE name :param prefix: Character(s) to restrict the dataset search :return: a dictionary with <dataset name>: <number of files> """ filters = {'scope': 'cms', 'did_type': DIDType.DATASET} if prefix: filters['name'] = '/' + prefix + '*' account = SYNC_ACCOUNT_FMT % rse.lower() rule_filters = { 'account': account, 'scope': 'cms', 'did_type': DIDType.DATASET } with monitor.record_timer_block('cms_sync.time_rse_datasets'): synced_ds = { item['name'] for item in list_replication_rules(filters=rule_filters) if item['expires_at'] is None and ( prefix is None or item['name'].startswith('/' + prefix)) } all_datasets = [ dataset['name'] for dataset in list_datasets_per_rse(rse=rse, filters=filters) ] logging.info('Getting all datasets at %s with prefix %s' % (rse, prefix)) datasets = {} for dataset in all_datasets: if dataset in synced_ds: for ds in list_dataset_replicas(scope='cms', name=dataset, deep=True): if ds['rse'] == rse: datasets.update({dataset: ds['available_length']}) return datasets
class BlockSyncer(object): """ Class representing the replica at a site af a CMS Dataset (PhEDEx FileBlock) """ def __init__(self, block_name, pnn, rse=None, lifetime=None, dry_run=False): """ Get the status of replica of pditem at pnn considering only closed blocks completely replicated at site. :rds: PhEDEx block name. :pnn: PhEDEx node name. :rse: Rucio RSE. If None (default) inferred by the pnn using DEFAULT_RSE_FMT. :scope: Scope. Default: DEFAULT_SCOPE. """ self.phedex_svc = PhEDEx() self.dry_run = dry_run self.pnn = pnn if rse is None: self.rse = list_rses('cms_type=real&pnn=%s' % self.pnn)[0]['rse'] else: self.rse = rse rse_details = get_rse(self.rse) self.rse_id = rse_details['id'] self.account = SYNC_ACCOUNT_FMT % self.rse.lower() self.container = self.phedex_svc.check_data_item( pditem=block_name)['pds'] self.scope = DEFAULT_SCOPE self.block_name = block_name self.lifetime = lifetime self.group, self.custodial = self.phedex_svc.block_at_pnn_phedex( block=self.block_name, pnn=self.pnn) self.is_at_pnn = bool(self.group) if self.is_at_pnn: self.replicas = self.phedex_svc.fileblock_files_phedex( pnn=pnn, pfb=block_name) else: self.replicas = {} self.container_exists = None self.block_exists = None self.rule_exists = None touch(text=self.rse) def add_to_rucio(self, recover=False): """""" with monitor.record_timer_block('cms_sync.time_add_block'): self.register_container() block_exists = self.register_block() if block_exists: self.update_replicas() if recover: self.make_replicas_available() self.update_rule() else: logging.critical('Unable to make the block %s', self.block_name) def remove_from_rucio(self): """""" with monitor.record_timer_block('cms_sync.time_remove_block'): self.update_replicas() self.update_rule() def register_container(self): self.container_exists = False if self.is_at_pnn and self.dry_run: logging.info('Dry Run: Create container %s in scope %s.', self.container, self.scope) self.container_exists = True return self.container_exists try: get_did(scope=self.scope, name=self.container) monitor.record_counter('cms_sync.container_exists') self.container_exists = True logging.info('Found container %s', self.container) except DataIdentifierNotFound: if self.is_at_pnn: try: logging.info('Create container %s in scope %s.', self.container, self.scope) add_did(scope=self.scope, name=self.container, type='CONTAINER', issuer=self.account, lifetime=self.lifetime) monitor.record_counter('cms_sync.container_created') self.container_exists = True logging.info('Created container %s in scope %s.', self.container, self.scope) except DataIdentifierAlreadyExists: logging.warning('Container was created in the meanwhile') monitor.record_counter('cms_sync.container_collision') self.container_exists = True else: logging.warning('Container was not at PNN') return self.container_exists def register_block(self): """ Register the dataset (if there is a replica at the pnn) and attach to container :dry: Dry run. Default false. """ # FIXME: The logic here could use some improvement as we try to create a block even if it exists already try: get_did(scope=self.scope, name=self.block_name) self.block_exists = True monitor.record_counter('cms_sync.dataset_exists') except DataIdentifierNotFound: self.block_exists = False if self.is_at_pnn and self.dry_run: logging.info('Dry Run: Create dataset %s in scope %s.', self.block_name, self.scope) self.block_exists = True elif self.is_at_pnn: logging.info('Create block %s in scope %s.', self.block_name, self.scope) try: if not self.block_exists: add_did(scope=self.scope, name=self.block_name, type='DATASET', issuer=self.account, lifetime=self.lifetime) monitor.record_counter('cms_sync.dataset_created') except DataIdentifierAlreadyExists: logging.warning('Attempt to add %s:%s failed, already exists.', self.scope, self.block_name) monitor.record_counter('cms_sync.dataset_collision') try: attach_dids(scope=self.scope, name=self.container, attachment={ 'dids': [{ 'scope': self.scope, 'name': self.block_name }] }, issuer=self.account) except DuplicateContent: logging.warning( 'Attempt to add %s:%s to %s failed, already exists.', self.scope, self.block_name, self.container) except DataIdentifierNotFound: logging.error( 'Attempt to add %s:%s to %s failed. Container does not exist.', self.scope, self.block_name, self.container) return False self.block_exists = True else: logging.warning('Block %s was not at PNN', self.block_name) return self.block_exists def update_rule(self): """ Adds or removes the rule for the block. """ rules = list_replication_rules(filters={ 'scope': self.scope, 'name': self.block_name }) # rules = self.rcli.list_did_rules(scope=self.scope, name=self.block_name) rse_expression = 'rse=' + self.rse remove_rules = [ rule for rule in rules if rule['account'] == self.account and rule['rse_expression'] == rse_expression ] if not remove_rules and self.is_at_pnn: self.rule_exists = False if self.dry_run: logging.info("Dry run: Adding rule for dataset %s at rse %s.", self.block_name, self.rse) else: self.add_replication_rule_with_defaults( dids=[{ 'scope': self.scope, 'name': self.block_name }], copies=1, rse_expression=rse_expression, account=self.account) monitor.record_counter('cms_sync.rules_added') self.rule_exists = True elif remove_rules and not self.is_at_pnn: self.rule_exists = True if self.dry_run: logging.info("Removing rules for dataset %s at rse %s.", self.block_name, self.rse) else: for rule in remove_rules: # delete_replication_rule(rule['id'], purge_replicas=False, issuer=self.account) delete_rule(rule_id=rule['id'], purge_replicas=True, soft=False) monitor.record_counter('cms_sync.rules_removed') self.rule_exists = False def update_replicas(self): """ Add or removes replicas for the dataset at rse. """ with monitor.record_timer_block('cms_sync.time_update_replica'): logging.info('Updating replicas for %s:%s at %s', self.scope, self.block_name, self.rse) replicas = list_replicas(dids=[{ 'scope': self.scope, 'name': self.block_name }], rse_expression='rse=%s' % self.rse) try: rucio_replicas = {repl['name'] for repl in replicas} except TypeError: rucio_replicas = set() phedex_replicas = set(self.replicas.keys()) missing = list(phedex_replicas - rucio_replicas) to_remove = list(rucio_replicas - phedex_replicas) if missing and (len(phedex_replicas) != len(missing)): logging.warn( 'Recovery: Inconsistency found for %s at %s: %s in PhEDEx and %s missing', self.rse, self.block_name, len(phedex_replicas), len(missing)) if missing: lfns_added = self.add_missing_replicas(missing) monitor.record_counter('cms_sync.files_added', delta=lfns_added) if to_remove: lfns_removed = self.remove_extra_replicas(to_remove) monitor.record_counter('cms_sync.files_removed', delta=lfns_removed) return def make_replicas_available(self): """ Marks available replicas for the dataset at rse if they are in PhEDEx """ with monitor.record_timer_block('cms_sync.time_recover_replica'): logging.info('Recovering unavailable replicas for %s:%s at %s', self.scope, self.block_name, self.rse) replicas = list_replicas(dids=[{ 'scope': self.scope, 'name': self.block_name }], rse_expression='rse=%s' % self.rse, all_states=True) try: unavailable_replicas = { repl['name'] for repl in replicas if repl['states'][self.rse] != 'AVAILABLE' } except TypeError: unavailable_replicas = set() phedex_replicas = set(self.replicas.keys()) missing = list(phedex_replicas & unavailable_replicas) logging.info( 'Recovery for %s:%s at %s: PhEDEx has %s, Rucio unavailable %s. Missing: %s ', self.scope, self.block_name, self.rse, len(phedex_replicas), len(unavailable_replicas), len(missing)) # Fix up things which are unavailable rse_details = get_rse(self.rse) rse_id = rse_details['id'] scope = InternalScope(self.scope) state = 'A' for name in missing: logging.info('Setting available %s:%s at %s', self.scope, name, self.rse) core_update_state(rse_id=rse_id, scope=scope, name=name, state=state) monitor.record_counter('cms_sync.files_made_available', delta=len(missing)) return def remove_extra_replicas(self, to_remove): """ :param to_remove: replicas to remove from Rucio :return: """ scope = InternalScope(self.scope) with monitor.record_timer_block('cms_sync.time_remove_replica'): if to_remove and self.dry_run: logging.info('Dry run: Removing replicas %s from rse %s.', str(to_remove), self.rse) elif to_remove: logging.debug('Removing %s replicas from rse %s.', len(to_remove), self.rse) for to_remove_chunk in chunks(to_remove, REMOVE_CHUNK_SIZE): replicas = [{ 'scope': scope, 'name': lfn, "rse_id": self.rse_id, "state": "U" } for lfn in to_remove_chunk] # transactional_session here? # while lock is set stuck, judge-repairer might make transfer requests before rule is gone but does it matter? update_replicas_states( replicas=replicas, add_tombstone=False, ) # delete_replicas(rse=self.rse, issuer=self.account, # files=[{'scope': self.scope, 'name': lfn} for lfn in to_remove_chunk]) return len(to_remove) def add_missing_replicas(self, missing): """ :param missing: possible missing lfns :return: """ with monitor.record_timer_block('cms_sync.time_add_replica'): if missing and self.dry_run: logging.info('Dry run: Adding replicas %s to rse %s.', str(missing), self.rse) elif missing: logging.info('Adding %s replicas to rse %s.', len(missing), self.rse) replicas_to_add = [self.replicas[lfn] for lfn in missing] files = replica_file_list(replicas=replicas_to_add, scope=self.scope) for rucio_file in files: try: update_file = copy.deepcopy(rucio_file) update_file.update({ 'scope': InternalScope(self.scope), "rse_id": self.rse_id, "state": "A" }) update_replicas_states(replicas=[update_file], add_tombstone=False) except ReplicaNotFound: try: add_replicas(rse=self.rse, files=[rucio_file], issuer=self.account, ignore_availability=True) except RucioException: logging.critical( 'Could not add %s to %s. Constraint violated?', rucio_file, self.rse) resurrect([{ 'scope': rucio_file['scope'], 'name': rucio_file['name'] }], issuer=self.account) add_replicas(rse=self.rse, files=[rucio_file], issuer=self.account, ignore_availability=True) logging.critical('Resurrected %s at %s', rucio_file, self.rse) # add_replicas(rse=self.rse, files=files, issuer=self.account) lfns = [ item['name'] for item in list_files( scope=self.scope, name=self.block_name, long=False) ] missing_lfns = list(set(missing) - set(lfns)) if missing_lfns: logging.debug('Attaching %s lfns to %s at %s', len(missing_lfns), self.block_name, self.rse) dids = [{ 'scope': self.scope, 'name': lfn } for lfn in missing_lfns] try: attach_dids(scope=self.scope, name=self.block_name, attachment={'dids': dids}, issuer=self.account) except FileAlreadyExists: logging.warning( 'Trying to attach already existing files to %s', self.block_name) except DataIdentifierNotFound: logging.critical( 'Could not attach to %s at %s. Constraint violated?', self.block_name, self.rse) return len(missing_lfns) def add_replication_rule_with_defaults(self, dids, copies, rse_expression, account): """ Add replication rule requires one to send all the values. Add a list of defaults. If true options are required, move them into the parameter list. :param dids: List of dids (scope/name dictionary) :param copies: Number of copies :param rse_expression: RSE expression :param account: Account for the rule :return: None """ (grouping, weight, lifetime, locked, subscription_id, source_replica_expression, notify, purge_replicas, ignore_availability, comment, ask_approval, asynchronous, priority, split_container) = ('DATASET', None, None, False, None, None, None, False, False, None, False, False, 3, False) activity = 'Data Consolidation' meta = json.dumps({ "phedex_group": self.group, "phedex_custodial": self.custodial }) add_replication_rule( dids=dids, copies=copies, rse_expression=rse_expression, account=account, grouping=grouping, weight=weight, lifetime=lifetime, locked=locked, subscription_id=subscription_id, source_replica_expression=source_replica_expression, activity=activity, notify=notify, purge_replicas=purge_replicas, ignore_availability=ignore_availability, comment=comment, ask_approval=ask_approval, asynchronous=asynchronous, priority=priority, split_container=split_container, meta=meta, issuer=account)
def sync(config, logs): """ Main Sync process """ logging.my_logfile(logs=logs) logging.my_fmt(label='main_sync') starttime = datetime.now() modify = {} workers = {} # this is the array of running pnns pnns = None # this is the array of pnn to be launched pool = None pcli = PhEDEx() install_mp_handler() conf = _load_config(config, modify, starttime) pnns = [] size = conf['main']['pool'] logging.summary('Starting') while conf['main']['run']: if pool is None: logging.notice('Started pool of size %d', size) pool = multiprocessing.NDPool(size) add = [ pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run'] if pnn not in workers if pnn not in pnns ] pnns += add random.shuffle(pnns) if not _ping(): logging.warning('Cannot ping, not launching workers') else: _launch_workers(pool, workers, pnns, pcli) pnns = [] _poll_workers(workers, pnns) conf = _load_config(config, modify, starttime) if not conf['main']['run'] or\ conf['main']['pool'] != size: # trigger draining of all workers, close the pool and wait # for the task to be over conf = _load_config(config, {'default': {'run': False}}, starttime) _drain_up(workers, pnns) workers = {} pool.close() pool = None size = conf['main']['pool'] else: time.sleep(conf['main']['sleep']) logging.summary('Exiting.') return config
class CMSRucioDatasetReplica(object): """ Class repeesenting the replica at a site af a CMS Dataset (PhEDEx FileBlock) """ #pylint: disable=too-many-arguments def __init__(self, rds, pnn, rse=None, scope=DEFAULT_SCOPE, lifetime=None, pcli=None, rcli=None): """ Get the status of replica of pditem at pnn considering only closed blocks completely replicated at site. :pnn: PhEDEx node name. :rds: Rucio Dataset (PhEDEx FileBlock) name. :rse: Rucio RSE. If None (default) inferred by the pnn using DEFAULT_RSE_FMT. :scope: Scope. Default: DEFAULT_SCOPE. :pcli: Reference to a phedex.PhEDEx object or a dict {'instance': <instance>, 'dasgoclient': <path>, 'datasvc': <url>} none of the keys is mandatory. Default is {}. :rcli: Reference to a rucio Client() instance or a dict {'accont': ..., ... } none of the keys is mandatory. Default is {'account': <sync account>} """ self.pnn = pnn self._get_pcli(pcli) self._get_rcli(rcli) if rse is None: self.rse = self.rcli.list_rses('cms_type=real&pnn=%s' % self.pnn)[0]['rse'] else: self.rse = rse self.container = self.pcli.check_data_item(pditem=rds)['pds'] self.dataset = rds self.scope = scope self.lifetime = lifetime self.block_at_pnn() if self.is_at_pnn: self.replicas = self.pcli.fileblock_files(pnn=pnn, pfb=rds) else: self.replicas = {} def _get_pcli(self, pcli): if pcli is None: pcli = {} if isinstance(pcli, dict): self.pcli = PhEDEx(**pcli) elif isinstance(pcli, PhEDEx): #pylint: disable=redefined-variable-type self.pcli = pcli else: raise Exception("wrong type for pcli parameter %s" %\ type(pcli)) def _get_rcli(self, rcli): if rcli is None: rcli = {} if isinstance(rcli, dict): if 'account' not in rcli: rcli['account'] = SYNC_ACCOUNT_FMT % self.pnn.lower() self.rcli = Client(**rcli) elif isinstance(rcli, Client): #pylint: disable=redefined-variable-type self.rcli = rcli else: raise Exception("wrong type for rcli parameter %s" %\ type(rcli)) def block_at_pnn(self): """ Verify if the block is at pnn (using phedex datasvn) """ metadata = self.pcli.list_data_items(pditem=self.dataset, pnn=self.pnn, locality=True, metadata=True) self.is_at_pnn = bool(len(metadata) == 1 and\ 'block' in metadata[0] and\ 'replica' in metadata[0]['block'][0] and\ metadata[0]['block'][0]['replica'][0]['complete'] == 'y') def register_container(self, dry=False): """ Register container of the dataset (only if there is a dataset replica on the pnn) :dry: Dry run. Default false. """ try: self.rcli.get_did(scope=self.scope, name=self.container) return 'exists' except DataIdentifierNotFound: pass if self.is_at_pnn and dry: logging.dry('Create container %s in scope %s.', self.container, self.scope) return 'created' elif self.is_at_pnn: logging.verbose('Create container %s in scope %s.', self.container, self.scope) try: self.rcli.add_container(scope=self.scope, name=self.container, lifetime=self.lifetime) except DataIdentifierAlreadyExists: logging.warning('Container was created in the meanwhile') return 'exists' return 'created' return 'skipped' def register_dataset(self, dry=False): """ Register the dataset (if there is a replica at the pnn) :dry: Dry run. Default false. """ try: self.rcli.get_did(scope=self.scope, name=self.dataset) return 'exists' except DataIdentifierNotFound: pass if self.is_at_pnn and dry: logging.dry('Create dataset %s in scope %s.', self.dataset, self.scope) return 'created' elif self.is_at_pnn: logging.verbose('Create dataset %s in scope %s.', self.dataset, self.scope) self.rcli.add_dataset(scope=self.scope, name=self.dataset, lifetime=self.lifetime) self.rcli.attach_dids(scope=self.scope, name=self.container, dids=[{ 'scope': self.scope, 'name': self.dataset }]) return 'created' return 'skipped' def update_replicas(self, dry=False): """ Add or removes replicas for the dataset at rse. :dry: Drydrun. default false """ logging.notice('Updating replicas for %s:%s at %s' % (self.scope, self.dataset, self.rse)) replicas = self.rcli.list_replicas([{ 'scope': self.scope, 'name': self.dataset }], rse_expression='rse=%s' % self.rse) rrepl = [repl['name'] for repl in replicas] prepl = [repl for repl in self.replicas.keys()] missing = list(set(prepl) - set(rrepl)) to_remove = list(set(rrepl) - set(prepl)) if missing and dry: logging.dry('Adding replicas %s to rse %s.', str(missing), self.rse) elif missing: logging.verbose('Adding replicas %s to rse %s.', str(missing), self.rse) self.rcli.add_replicas(rse=self.rse, files=[{ 'scope': self.scope, 'name': self.replicas[lfn]['name'], 'adler32': self.replicas[lfn]['checksum'], 'bytes': self.replicas[lfn]['size'], } for lfn in missing]) # missing files that are not in the list of dataset files # are to be attached. lfns = [ item['name'] for item in self.rcli.list_files(scope=self.scope, name=self.dataset) ] missing_lfns = list(set(missing) - set(lfns)) if missing_lfns: logging.verbose('Attaching lfns %s to dataset %s.', str(missing_lfns), self.dataset) try: self.rcli.attach_dids( scope=self.scope, name=self.dataset, dids=[{ 'scope': self.scope, 'name': lfn } for lfn in list(set(missing) - set(lfns))]) except FileAlreadyExists: logging.warning('Trying to attach already existing files.') if to_remove and dry: logging.dry('Removing replicas %s from rse %s.', str(to_remove), self.rse) elif to_remove: logging.verbose('Removing replicas %s from rse %s.', str(to_remove), self.rse) for to_remove_chunk in chunks(to_remove, REMOVE_CHUNK_SIZE): attempt = 0 while True: attempt += 1 try: self.rcli.delete_replicas(rse=self.rse, files=[{ 'scope': self.scope, 'name': lfn, } for lfn in to_remove_chunk ]) break except DatabaseException: logging.warning( 'DatabaseException raised, retrying...') if attempt > 3: raise time.sleep(randint(1, 5)) return {'added': missing, 'removed': to_remove} def update_rule(self, dry=False): """ Adds or removes the rule for the dataset. :dry: Drydrun. default false returns the action performed: None, added, removed """ rules = self.rcli.list_did_rules(scope=self.scope, name=self.dataset) rrule = None account = self.rcli.__dict__['account'] action = None rse_exp = 'rse=' + self.rse rrule = next(( rule for rule in rules if rule['account'] == account and\ rule['rse_expression'] == rse_exp ), None) if rrule is None and self.is_at_pnn: if dry: logging.dry("Adding rule for dataset %s at rse %s.", self.dataset, self.rse) else: self.rcli.add_replication_rule( dids=[{ 'scope': self.scope, 'name': self.dataset }], copies=1, rse_expression=rse_exp, ) action = 'added' elif rrule is not None and not self.is_at_pnn: # removing rule if dry: logging.dry("Removing rule for dataset %s at rse %s.", self.dataset, self.rse) else: self.rcli.delete_replication_rule(rrule['id'], purge_replicas=False) action = 'removed' return action def update(self, dry=False): """ syncronize the dataset replica info. :dry: Drydrun. default false """ ret = {'at_node': self.is_at_pnn} #datasets and containers are only added ret['container'] = self.register_container(dry) ret['dataset'] = self.register_dataset(dry) ret['replicas'] = self.update_replicas(dry) ret['rule'] = self.update_rule(dry) return ret
help='dataset to be updates. Can have wildcard and can be multiple') PARSER.add_argument('--pool', dest='pool', default=1, help='number of parallel threads. Default 1.') OPTIONS = PARSER.parse_args() logging.my_lvl(OPTIONS.verbosity) # logging.summary('DBP1') install_mp_handler() POOL = multiprocessing.Pool(int(OPTIONS.pool)) PCLI = PhEDEx() PNNS = PCLI.pnns(select=OPTIONS.pnn) TIMING = {} WILDCARD = re.compile(r'\S*[*]\S*') DATASETS = get_timing(_get_dset_list(PCLI, OPTIONS.dataset), TIMING) PROCS = get_timing(_launch_workers(PNNS, DATASETS, POOL, OPTIONS, PCLI), TIMING) get_timing(_get_workers(POOL, PROCS), TIMING) logging.summary(
class SiteSyncer(object): def __init__(self, options): self.options = options self.config = load_config(options.config) self.last_synced = {} # load_last_synced() self.phedex_svc = PhEDEx() self.patterns = [] return def sync_site(self, site_pair): """ Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site :return: """ site, prefix = site_pair if site.endswith('_Tape'): pnn = site.replace('_Tape', '_MSS') else: pnn = site if site == 'T3_CH_CERN_CTA_CastorTest': pnn = 'T0_CH_CERN_MSS' # now = int(time.time()) # Set 1980 as the last sync date if no data exists # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600) # last_week = int(site_last_synced - 7 * 24 * 3600) if self.config.get('default', None): if self.config['default'].get('chunck', 0): BLOCKS_PER_ACTION = int(self.config['default']['chunck']) if self.config['default'].get('select', None): self.patterns = [self.config['default']['select']] with monitor.record_timer_block('cms_sync.time_site_sync'): r_timer = 'cms_sync.time_rucio_block_list_all' p_timer = 'cms_sync.time_phedex_block_list_all' if prefix: r_timer = 'cms_sync.time_rucio_block_list_partial' p_timer = 'cms_sync.time_phedex_block_list_partial' # Add touches to keep from getting killed as long as progress is being made with monitor.record_timer_block(p_timer): touch(text='PQ ' + site) phedex_blocks = self.phedex_svc.blocks_at_site(pnn=pnn, prefix=prefix, since=None) with monitor.record_timer_block(r_timer): touch(text='RQ ' + site) rucio_blocks = self.get_datasets_at_rse(rse=site, prefix=prefix) touch(text='DQ ' + site) n_blocks_in_phedex = len(phedex_blocks) n_blocks_in_rucio = len(rucio_blocks) # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed if not n_blocks_in_phedex and n_blocks_in_rucio: logging.warning( "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s", site, n_blocks_in_phedex, n_blocks_in_rucio, prefix) return if not n_blocks_in_phedex and not n_blocks_in_rucio: logging.info( "At %s:%s, nothing in PhEDEx or Rucio. Quitting." % (site, prefix)) return block_report = compare_site_blocks(phedex=phedex_blocks, rucio=rucio_blocks, rse=site, patterns=self.patterns) n_blocks_not_in_rucio = len(block_report['not_rucio']) n_blocks_not_in_phedex = len(block_report['not_phedex']) n_incomplete_blocks = len(block_report['incomplete']) logging.info("At %s:%s In both/PhEDEx only/Rucio only: %s/%s/%s" % (site, prefix, len(block_report['complete']), n_blocks_not_in_rucio, n_blocks_not_in_phedex)) if len(block_report['complete'] ) or n_blocks_not_in_rucio or n_blocks_not_in_phedex: logging.info( 'At %s:%s %3.0f%% complete', site, prefix, len(block_report['complete']) * 100 / (len(block_report['complete']) + n_blocks_not_in_rucio + n_blocks_not_in_phedex)) if len(block_report['complete']) or n_blocks_not_in_rucio: logging.info( 'At %s:%s %3.0f%% completely added', site, prefix, len(block_report['complete']) * 100 / (len(block_report['complete']) + n_blocks_not_in_rucio)) # Truncate lists if we want to reduce cycle time if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION: block_report['not_rucio'] = set( list(block_report['not_rucio'])[:BLOCKS_PER_ACTION]) n_blocks_not_in_rucio = len(block_report['not_rucio']) if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION: block_report['not_phedex'] = set( list(block_report['not_phedex'])[:BLOCKS_PER_ACTION]) n_blocks_not_in_phedex = len(block_report['not_phedex']) logging.info('Adding %6d blocks to Rucio for %s:%s', n_blocks_not_in_rucio, site, prefix) for block in block_report['not_rucio']: logging.info('Adding to rucio: %s at %s', block, site) bs = BlockSyncer(block_name=block, pnn=pnn, rse=site) bs.add_to_rucio() logging.info('Removing %6d blocks from Rucio for %s:%s', n_blocks_not_in_phedex, site, prefix) for block in block_report['not_phedex']: logging.info('Removing from rucio: %s at %s', block, site) bs = BlockSyncer(block_name=block, pnn=pnn, rse=site) bs.remove_from_rucio() for block in block_report['incomplete']: logging.warn('Redoing sync for %s at %s', block, site) bs = BlockSyncer(block_name=block, pnn=pnn, rse=site) bs.add_to_rucio(recover=True) logging.info('Finished syncing %s:%s' % (site, prefix)) def chunks_to_sync(self): """ Turn the config into a list of site/prefix pairs which need to be synced :return: The site prefix pairs """ to_sync = [] for site, site_config in self.config.items(): print('Site %s (%s)is ok %s' % (site, type(site), site not in ['default', 'main'])) if site not in ['default', 'main']: if site_config.get('multi_das_calls', False): for prefix in list(string.ascii_letters + string.digits): if (('CERN' in site) or ('FNAL' in site) or ('_Tape' in site)) and prefix == 'S': for fnal_prefix in ('Sc', 'Se', 'Si', 'Sp', 'St', 'SI', 'SM', 'ST', 'SU', 'SV', 'SS', 'Su', 'SP', 'SL'): to_sync.append((site, fnal_prefix)) elif (('T0' in site) or ('FNAL' in site) or ('_Tape' in site)) and prefix == 'M': for fnal_prefix in ('Ma', 'MC', 'ME', 'Mi', 'Mo', 'MS', 'Mu'): to_sync.append((site, fnal_prefix)) elif (('T0' in site) or ('FNAL' in site) or ('_Tape' in site)) and prefix == 'D': for fnal_prefix in ('D0', 'Da', 'Di', 'DM', 'Do', 'Dp', 'DP', 'Ds', 'DS', 'DY'): to_sync.append((site, fnal_prefix)) elif (('T0' in site) or ('FNAL' in site) or ('_Tape' in site)) and prefix == 'T': for fnal_prefix in ('T1', 'T4', 'T5', 'TH', 'TK', 'TO', 'TA', 'TB', 'TC', 'TG', 'TZ', 'T_', 'TS', 'TT', 'TW', 'Tk', 'To', 'Ta', 'Tb', 'Te', 'Tp', 'Tr', 'Ts', 'Tt', 'Tw', 'Ty'): to_sync.append((site, fnal_prefix)) elif (('CERN' in site) or ('FNAL' in site)) and prefix == 'H': for fnal_prefix in ('H0', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'Ha', 'HA', 'Hc', 'He', 'HE', 'HF', 'Hi', 'HI', 'HJ', 'HL', 'Hp', 'HP', 'Hs', 'HS', 'HT', 'HV', 'HW', 'Hy', 'HZ'): to_sync.append((site, fnal_prefix)) elif (('T0' in site) or ('FNAL' in site) or ('_Tape' in site) or ('_CTA' in site)) and prefix == 'C': for fnal_prefix in ('Ca', 'CE', 'CG', 'Ch', 'CI', 'CM', 'Co', 'CS'): to_sync.append((site, fnal_prefix)) elif (('CERN' in site) or ('FNAL' in site)) and prefix == 'Z': for fnal_prefix in ('Z0', 'Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'ZA', 'Zb', 'ZB', 'Zc', 'ZC', 'Ze', 'ZE', 'ZG', 'ZH', 'ZJ', 'ZL', 'Zm', 'ZM', 'Zn', 'ZN', 'Zp', 'ZP', 'ZR', 'Zt', 'ZT', 'ZU', 'ZV', 'ZZ'): to_sync.append((site, fnal_prefix)) elif (('CERN' in site) or ('FNAL' in site)) and prefix == 'G': for fnal_prefix in ('G_', 'G1', 'Ga', 'Ge', 'GF', 'GG', 'Gj', 'GJ', 'Gl', 'GM', 'Gr', 'Gs', 'GV'): to_sync.append((site, fnal_prefix)) else: to_sync.append((site, prefix)) else: to_sync.append((site, None)) # Cut the list (keep in order but choose a random starting point) offset = random.randrange(len(to_sync)) to_sync = to_sync[offset:] + to_sync[:offset] to_sync = [ # # ('T1_US_FNAL_Tape', 'ST_s-channel_4f_leptonDecays_TuneCP5_13TeV-amcatnlo-pythia8/RunIISummer19UL18RECO-106X_upgrade2018_realistic_v11_L1v1-v1'), ('T0_CH_CERN_Tape', 'DQ'), ('T0_CH_CERN_Tape', 'TAC'), # # ('T1_US_FNAL_Tape', 'VBFH_HToSSTo4Tau_MH-125_TuneCUETP8M1_13TeV-powheg-pythia8/RunIISummer16DR80Premix-PUMoriond17_rp_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v2'), # # ('T1_US_FNAL_Tape', 'ZeroBias1/Commissioning2018-26Apr2018-v1'), ] return to_sync @staticmethod def get_datasets_at_rse(rse, prefix=None): """ :param rse: The RSE name :param prefix: Character(s) to restrict the dataset search :return: a dictionary with <dataset name>: <number of files> """ filters = {'scope': 'cms', 'did_type': DIDType.DATASET} if prefix: filters['name'] = '/' + prefix + '*' account = SYNC_ACCOUNT_FMT % rse.lower() rule_filters = { 'account': account, 'scope': 'cms', 'did_type': DIDType.DATASET } with monitor.record_timer_block('cms_sync.time_rse_datasets'): synced_ds = { item['name'] for item in list_replication_rules(filters=rule_filters) if item['expires_at'] is None and ( prefix is None or item['name'].startswith('/' + prefix)) } all_datasets = [ dataset['name'] for dataset in list_datasets_per_rse(rse=rse, filters=filters) ] logging.info('Getting all datasets at %s with prefix %s' % (rse, prefix)) datasets = {} for dataset in all_datasets: if dataset in synced_ds: for ds in list_dataset_replicas(scope='cms', name=dataset, deep=True): if ds['rse'] == rse: datasets.update({dataset: ds['available_length']}) return datasets
class CMSRSE(object): """ Wrapping the definition of a CMS RSE. Gathering the information from PhEDEx and translating them into the definition of a Rucio RSE for the different expected types: real, test, temp. """ def __init__(self, pnn, account, auth_type=None, rsetype=DEFAULT_RSETYPE, suffix=None, dry=False, fts=None, tier=None, lfn2pfn_algorithm=None, country=None, attrs=None, seinfo=None, tfc=None, tfc_exclude=EXCLUDE_TFC, domains=None, space_token=None, add_prefix=None, proto=DEFAULT_PROTOCOL, instance=DEFAULT_PHEDEX_INST, dasgoclient=DEFAULT_DASGOCLIENT, datasvc=DEFAULT_DATASVC_URL): attrs = attrs or [] self.pnn = pnn self.rsetype = rsetype if suffix is None: suffix = DEFAULT_SUFFIXES[rsetype] self.suffix = suffix if pnn.endswith('_MSS'): raise ValueError( 'Please import PhEDEx _Buffer pnns rather than _MSS for tape endpoints' ) elif pnn.endswith('_Buffer'): self.rsename = pnn.replace('_Buffer', '_Tape') + self.suffix self.rucio_rse_type = 'TAPE' else: self.rsename = pnn + self.suffix self.rucio_rse_type = 'DISK' if tfc and os.path.isdir(tfc): self.tfc = tfc + '/' + pnn + '/PhEDEx/storage.xml' else: self.tfc = tfc self.pcli = PhEDEx(instance=instance, dasgoclient=dasgoclient, datasvc=datasvc) self.rcli = Client(account=account, auth_type=auth_type) self.dry = dry self._get_attributes(fts, tier, lfn2pfn_algorithm, country, attrs) self._get_settings() self._get_protocol(seinfo, add_prefix, tfc_exclude, domains, space_token, proto) def _get_attributes(self, fts, tier, lfn2pfn_algorithm, country, xattrs): """ Gets the expected RSE attributes according to the given cmsrse parameters and to the info from phedex :fts: fts server. If None the server defined for the pnn is taken. :tier: tier. If None it is taken from pnn :lfn2pfn_algorithm: algorithm for lfn2pfn. If None the default rsetype to lfn2pfn mapping is used :country: country code. If None it is taken from pnn :xattrs: extra attributes """ attrs = {} attrs['fts'] = fts or self.pcli.fts(self.pnn)[0] pnn_match = PNN_MATCH.match(self.pnn) attrs['tier'] = tier or pnn_match.group(1) attrs['country'] = country or pnn_match.group(2) attrs['lfn2pfn_algorithm'] = lfn2pfn_algorithm or LFN2PFN_BYTYPE[ self.rsetype] attrs[self.rsename] = 'True' attrs['pnn'] = self.pnn attrs['cms_type'] = self.rsetype for (key, value) in xattrs: attrs[key] = value self.attrs = attrs def _set_attributes(self): try: rattrs = self.rcli.list_rse_attributes(rse=self.rsename) except RSENotFound: rattrs = {} changed = False for (key, value) in self.attrs.items(): if key not in rattrs or rattrs[key] != value: # Hack. I can find no way to define an attribute to 1 # (systematically reinterpreted as True) if key in rattrs and rattrs[key] is True and \ (str(value) == '1' or str(value) == 'True'): continue if key not in rattrs: rattrs[key] = 'None' logging.debug( 'setting attribute %s from value %s to value %s for rse %s', key, rattrs[key], value, self.rsename) changed = True if self.dry: logging.info( 'setting attribute %s to value %s for rse %s. Dry run, skipping', key, value, self.rsename) else: self.rcli.add_rse_attribute(rse=self.rsename, key=key, value=value) return changed def _get_settings(self): """ Get expected settings for the RSE (so far only deterministic vs non-deterministic) """ self.settings = {} if self.attrs['lfn2pfn_algorithm'] == 'hash': self.settings['deterministic'] = False else: self.settings['deterministic'] = True def _check_lfn2pfn(self): """ Checks that lfn2pfn works properly """ for lfn in SE_PROBES_BYTYPE[self.rsetype]: # this is what rucio does pfn = self.proto['scheme'] + '://' + self.proto['hostname'] + \ ':' + str(self.proto['port']) if 'web_service_path' in self.proto['extended_attributes']: pfn = pfn + self.proto['extended_attributes'][ 'web_service_path'] pfn = pfn + '/' + cmstfc('cms', lfn, None, None, self.proto) # this should match dataservice pfn, modulo some normalization # (e.g.: adding the port number) pfn_datasvc = [] wo_port = self.pcli.lfn2pfn( pnn=self.pnn, lfn=lfn, tfc=self.tfc, protocol=self.proto['extended_attributes']['tfc_proto']) wo_port = re.sub('/+', '/', wo_port) w_port = wo_port.replace( self.proto['hostname'], self.proto['hostname'] + ':' + str(self.proto['port'])) # Get rid of ALL multiple slashes, including separating protocol from host (valid for comparison only) pfn_datasvc.append(wo_port) pfn_datasvc.append(w_port) pfn = re.sub('/+', '/', pfn) if pfn not in pfn_datasvc: raise Exception( "rucio and datasvc lfn2pfn mismatch, rucio: %s ; datasvc: %s" % (pfn, pfn_datasvc)) logging.debug("checking lfn2pfn ok %s", pfn) def _get_protocol(self, seinfo, add_prefix, exclude, domains, token, proto): """ Get the informations about the RSE protocol from creator argument or from phedex :seinfo: informations about the SE (in the form of the seinfo method of PhEDEx class). If None the info is gathered from PhEDEx using the seinfo method. :add_prefix: path to be added to the prefix in seinfo. if none SE_ADD_PREFIX_BYTYPE is used. :tfc: dictionnary with tfc rules. If None the info is gathered from PhEDEx using the PhEDEx.tfc method, :exclude: rules to be excluded from tfc (in case it is gathered from PhEDEx). :domains: domains dictionnary. If none the DOMAINS_BYTYPE constant is used. :token: space token. default None :proto: protocol to be considered. default DEFAULT_PROTOCOL. """ seinfo = seinfo or self.pcli.seinfo( pnn=self.pnn, probes=SE_PROBES_BYTYPE[self.rsetype], protocol=proto, tfc=self.tfc) if self.tfc is not None and self.tfc[0] == '/': pnn_arg = self.tfc self.tfc = None else: pnn_arg = self.pnn self.tfc = self.tfc or self.pcli.tfc(pnn=pnn_arg, dump=False, exclude=exclude, normalize=seinfo, proto=proto) domains = domains or DOMAINS_BYTYPE[self.rsetype] self.proto = { 'scheme': seinfo['protocol'], 'hostname': seinfo['hostname'], 'port': seinfo['port'], 'extended_attributes': {}, 'domains': domains } if 'webpath' in seinfo: self.proto['extended_attributes']['web_service_path'] = seinfo[ 'webpath'] if self.attrs['lfn2pfn_algorithm'] == 'cmstfc': self.proto['prefix'] = '/' self.proto['extended_attributes']['tfc_proto'] = proto self.proto['extended_attributes']['tfc'] = self.tfc self._check_lfn2pfn() else: if self.rsetype == "temp": if 'webpath' in seinfo: self.proto['prefix'] = seinfo['prefix'] else: self.proto['prefix'] = '/' + seinfo['prefix'] else: self.proto['prefix'] = seinfo['prefix'] if add_prefix is None: add_prefix = SE_ADD_PREFIX_BYTYPE[self.rsetype] self.proto['prefix'] += add_prefix if token: self.proto['extended_attributes']['space_token'] = token if self.proto['extended_attributes'] == {}: self.proto['extended_attributes'] = None self.proto['impl'] = 'rucio.rse.protocols.gfalv2.Default' def _set_protocol(self): try: rprotos = self.rcli.get_protocols(rse=self.rsename) except (RSEProtocolNotSupported, RSENotFound): rprotos = [] rproto = {} for item in rprotos: if item['scheme'] == self.proto['scheme']: rproto = item break update = False if self.proto != rproto: logging.debug( "protocol definition not as expected: rucio=%s, expected=%s", str(rproto), str(self.proto)) update = True if update: if self.dry: logging.info('Modifying protocol to %s. Dry run, skipping', str(self.proto)) return update try: self.rcli.delete_protocols(rse=self.rsename, scheme=self.proto['scheme']) except RSEProtocolNotSupported: logging.debug("Cannot remove protocol (scheme, rse) = (%s,%s)", self.proto['scheme'], self.rsename) if (self.proto['scheme'] == 'srm' and 'extended_attribute' in self.proto and 'web_service_path' in self.proto['extended_attributes']): self.rcli.add_protocol(rse=self.rsename, params=self.proto) return update def _create_rse(self): create = False try: rse = self.rcli.get_rse(self.rsename) except RSENotFound: create = True if not create and rse['deterministic'] != self.settings[ 'deterministic']: raise Exception( "The rse %s was created with the wrong deterministic setting!", self.rsename) if create: if self.dry: logging.info( 'creating rse %s with deterministic %s and type %s. Dry run, skipping', self.rsename, self.settings['deterministic'], self.rucio_rse_type) else: self.rcli.add_rse(self.rsename, deterministic=self.settings['deterministic'], rse_type=self.rucio_rse_type) logging.debug('created rse %s', self.rsename) return create def update(self): """ Creates, if needed, and updates the RSE according to CMS rules and PhEDEx data. """ create_res = self._create_rse() attrs_res = self._set_attributes() proto_res = self._set_protocol() return create_res or attrs_res or proto_res
OPTIONS = PARSER.parse_args() if OPTIONS.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) if OPTIONS.domains: OPTIONS.domains = json.loads(OPTIONS.domains.replace("'", '"')) if OPTIONS.seinfo is not None: OPTIONS.seinfo = json.loads(OPTIONS.seinfo.replace("'", '"')) if 'all' in OPTIONS.pnn: OPTIONS.pnn = PhEDEx(instance=OPTIONS.instance).pnns( select=OPTIONS.select, exclude=OPTIONS.exclude) CHANGED = [] TOT = [] for node_name in OPTIONS.pnn: for rse_type in OPTIONS.type: logging.info('Starting pnn %s and type %s', node_name, rse_type) RSE = CMSRSE(pnn=node_name, rsetype=rse_type, account=OPTIONS.account, dry=OPTIONS.dry, suffix=OPTIONS.suffix, fts=OPTIONS.fts, tier=OPTIONS.tier, lfn2pfn_algorithm=OPTIONS.lfn2pfn,
class LinksMatrix(object): """ CMS RSE distances according to a set of rules """ def __init__(self, account, auth_type=None, exclude=DEFAULT_EXCLUDE_LINKS, distance=None, phedex_links=False, rselist=None, instance=DEFAULT_PHEDEX_INST, datasvc=DEFAULT_DATASVC_URL): if distance is None: distance = DEFAULT_DISTANCE_RULES self.pcli = PhEDEx(instance=instance, datasvc=datasvc) self.rcli = Client(account=account, auth_type=auth_type) self._get_rselist(rselist) self._get_matrix(distance, phedex_links, exclude) def _get_rselist(self, rselist=None): self.rselist = [] if rselist is None: rselist = [rse['rse'] for rse in self.rcli.list_rses()] for rse in rselist: attrs = self.rcli.list_rse_attributes(rse=rse) try: self.rselist.append({ 'rse': rse, 'pnn': attrs['pnn'], 'type': attrs['cms_type'], 'country': attrs['country'], 'region': attrs.get('region', None) }) except KeyError: logging.warning('No expected attributes for RSE %s. Skipping', rse) def _get_matrix(self, distance, phedex_links, exclude): if phedex_links: matrix = self.pcli.links() else: matrix = {} self.links = {} for src in self.rselist: for dest in self.rselist: src_rse = src['rse'] dest_rse = dest['rse'] src_pnn = src['pnn'] dest_pnn = dest['pnn'] link = -1 # Within site or in defined region, don't consult PhEDEx if dest_pnn == src_pnn: link = distance['site'] elif src['region'] and dest['region'] and src[ 'region'] == dest['region']: if src['country'] == dest['country']: link = distance['region&country'] else: link = distance['region'] elif src_pnn in matrix and dest_pnn in matrix[src_pnn]: # If no information, use PhEDEx info if it exists link = distance['site'] - matrix[src_pnn][dest_pnn] else: if src['country'] == dest['country']: link = distance['country'] else: link = distance['other'] if src_rse not in self.links: self.links[src_rse] = {} self.links[src_rse][dest_rse] = link self._filter_matrix(exclude) def _filter_matrix(self, exclude): for src in self.rselist: for dest in self.rselist: if src['rse'] == dest['rse']: continue for rule in exclude: matched = True for item in rule['src']: if not re.match(rule['src'][item], src[item]): matched = False for item in rule['dest']: if not re.match(rule['dest'][item], dest[item]): matched = False if matched: self.links[src['rse']][dest['rse']] = -1 break def update(self, overwrite=False, disable=True, dry=False, srcselect=r'\S+', dstselect=r'\S+'): """ Updates distances according to what is expected :overwrite: overwrite distance of the links that already exist :disable: set ranking to 0 for the links that should be disabled :dry: dry run """ count = {'checked': [], 'created': [], 'updated': [], 'disabled': []} src_regex = re.compile(srcselect) dst_regex = re.compile(dstselect) for src in self.rselist: srse = src['rse'] logging.info("Setting links from %s to %s other RSEs.", srse, len(self.rselist)) for dest in self.rselist: drse = dest['rse'] if srse == drse or not src_regex.match( srse) or not dst_regex.match(drse): continue count['checked'].append([srse, drse]) # Todo.. doublecheck I'm not reversing things link = self.rcli.get_distance(srse, drse) if srse in self.links and drse in self.links[ srse] and self.links[srse][drse] >= 0: if not link: pars = { 'distance': 1, 'ranking': self.links[srse][drse] } if dry: logging.info( "adding link from %s to %s with %s. Dry Run", srse, drse, str(pars)) else: self.rcli.add_distance(srse, drse, pars) count['created'].append([srse, drse]) elif link and overwrite: if dry: logging.info( "setting distance %s for link from %s to %s. Dry run.", self.links[srse][drse], srse, drse) else: self.rcli.update_distance( srse, drse, { 'ranking': self.links[srse][drse], 'distance': 1 }) count['updated'].append([srse, drse]) elif link and disable: if dry: logging.info("disabling link from %s to %s. Dry run", srse, drse) else: self.rcli.update_distance(srse, drse, { 'ranking': None, 'distance': None, }) count['disabled'].append([srse, drse]) return count
class SiteSyncer(object): def __init__(self, options): self.options = options self.config = load_config(options.config) self.last_synced = {} # load_last_synced() self.phedex_svc = PhEDEx() pass def sync_site(self, site_pair): """ Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site :return: """ site, prefix = site_pair # now = int(time.time()) # Set 1980 as the last sync date if no data exists # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600) # last_week = int(site_last_synced - 7 * 24 * 3600) if self.config.get('default', None): if self.config['default'].get('chunck', 0): BLOCKS_PER_ACTION = int(self.config['default']['chunck']) with monitor.record_timer_block('cms_sync.time_site_sync'): r_timer = 'cms_sync.time_rucio_block_list_all' p_timer = 'cms_sync.time_phedex_block_list_all' if prefix: r_timer = 'cms_sync.time_rucio_block_list_partial' p_timer = 'cms_sync.time_phedex_block_list_partial' with monitor.record_timer_block(p_timer): phedex_blocks = self.phedex_svc.blocks_at_site(pnn=site, prefix=prefix, since=None) with monitor.record_timer_block(r_timer): rucio_blocks = self.get_datasets_at_rse(rse=site, prefix=prefix) n_blocks_in_phedex = len(phedex_blocks) n_blocks_in_rucio = len(rucio_blocks) # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed if not n_blocks_in_phedex and n_blocks_in_rucio: logging.warning( "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s", site, n_blocks_in_phedex, n_blocks_in_rucio, prefix) return if not n_blocks_in_phedex and not n_blocks_in_rucio: logging.info( "At %s:%s, nothing in PhEDEx or Rucio. Quitting." % (site, prefix)) return block_report = compare_site_blocks(phedex=phedex_blocks, rucio=rucio_blocks, rse=site) n_blocks_not_in_rucio = len(block_report['not_rucio']) n_blocks_not_in_phedex = len(block_report['not_phedex']) n_incomplete_blocks = len(block_report['incomplete']) logging.info("At %s: In both/PhEDEx only/Rucio only: %s/%s/%s" % (site, len(block_report['complete']), n_blocks_not_in_rucio, n_blocks_not_in_phedex)) # Truncate lists if we want to reduce cycle time if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION: block_report['not_rucio'] = set( list(block_report['not_rucio'])[:BLOCKS_PER_ACTION]) n_blocks_not_in_rucio = len(block_report['not_rucio']) if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION: block_report['not_phedex'] = set( list(block_report['not_phedex'])[:BLOCKS_PER_ACTION]) n_blocks_not_in_phedex = len(block_report['not_phedex']) logging.info('Adding %6d blocks to Rucio for %s:%s', n_blocks_not_in_rucio, site, prefix) for block in block_report['not_rucio']: bs = BlockSyncer(block_name=block, pnn=site, rse=site) bs.add_to_rucio() logging.info('Removing %6d blocks from Rucio for %s:%s', n_blocks_not_in_phedex, site, prefix) for block in block_report['not_phedex']: bs = BlockSyncer(block_name=block, pnn=site, rse=site) bs.remove_from_rucio() for block in block_report['incomplete']: logging.warn('Redoing sync for %s at %s', block, site) bs = BlockSyncer(block_name=block, pnn=site, rse=site) bs.add_to_rucio(recover=True) logging.info('Finished syncing %s:%s' % (site, prefix)) # FIXME: Resurrect code to check for size differences # self.last_synced[site_pair] = now # save_last_synced(self.last_synced) def chunks_to_sync(self): """ Turn the config into a list of site/prefix pairs which need to be synced :return: The site prefix pairs """ to_sync = [] for site, site_config in self.config.items(): if site not in ['default', 'main']: if site_config.get('multi_das_calls', False): for prefix in list(string.letters + string.digits): to_sync.append((site, prefix)) else: to_sync.append((site, None)) random.shuffle(to_sync) return to_sync @staticmethod def get_datasets_at_rse(rse, prefix=None): """ :param rse: The RSE name :param prefix: Character(s) to restrict the dataset search :return: a dictionary with <dataset name>: <number of files> """ filters = {'scope': 'cms', 'did_type': DIDType.DATASET} if prefix: filters['name'] = '/' + prefix + '*' with monitor.record_timer_block('cms_sync.time_rse_datasets'): all_datasets = [ dataset['name'] for dataset in list_datasets_per_rse(rse=rse, filters=filters) ] for dataset in all_datasets: datasets = { dataset: ds['available_length'] for ds in list_dataset_replicas( scope='cms', name=dataset, deep=True) if ds['rse'] == rse } # datasets = {dataset['name']: dataset['available_length'] # for dataset in list_datasets_per_rse(rse=rse, filters=filters, deep=True)} return datasets