Example #1
class SiteSyncer(object):
    def __init__(self, options):
        self.options = options

        self.config = load_config(options.config)
        self.last_synced = {}  # load_last_synced()
        self.phedex_svc = PhEDEx()
        self.patterns = []


    def sync_site(self, site_pair):
        Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site
        site, prefix = site_pair

        if site.endswith('_Tape'):
            pnn = site.replace('_Tape', '_MSS')
            pnn = site

        if site == 'T3_CH_CERN_CTA_CastorTest':
            pnn = 'T0_CH_CERN_MSS'

        # now = int(time.time())

        # Set 1980 as the last sync date if no data exists
        # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600)
        # last_week = int(site_last_synced - 7 * 24 * 3600)

        if self.config.get('default', None):
            if self.config['default'].get('chunck', 0):
                BLOCKS_PER_ACTION = int(self.config['default']['chunck'])
            if self.config['default'].get('select', None):
                self.patterns = [self.config['default']['select']]

        with monitor.record_timer_block('cms_sync.time_site_sync'):

            r_timer = 'cms_sync.time_rucio_block_list_all'
            p_timer = 'cms_sync.time_phedex_block_list_all'
            if prefix:
                r_timer = 'cms_sync.time_rucio_block_list_partial'
                p_timer = 'cms_sync.time_phedex_block_list_partial'

            # Add touches to keep from getting killed as long as progress is being made
            with monitor.record_timer_block(p_timer):
                touch(text='PQ ' + site)
                phedex_blocks = self.phedex_svc.blocks_at_site(pnn=pnn,
            with monitor.record_timer_block(r_timer):
                touch(text='RQ ' + site)
                rucio_blocks = self.get_datasets_at_rse(rse=site,
                touch(text='DQ ' + site)

            n_blocks_in_phedex = len(phedex_blocks)
            n_blocks_in_rucio = len(rucio_blocks)

            # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed
            if not n_blocks_in_phedex and n_blocks_in_rucio:
                    "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s",
                    site, n_blocks_in_phedex, n_blocks_in_rucio, prefix)
            if not n_blocks_in_phedex and not n_blocks_in_rucio:
                    "At %s:%s, nothing in PhEDEx or Rucio. Quitting." %
                    (site, prefix))

            block_report = compare_site_blocks(phedex=phedex_blocks,

            n_blocks_not_in_rucio = len(block_report['not_rucio'])
            n_blocks_not_in_phedex = len(block_report['not_phedex'])
            n_incomplete_blocks = len(block_report['incomplete'])

            logging.info("At %s:%s In both/PhEDEx only/Rucio only: %s/%s/%s" %
                         (site, prefix, len(block_report['complete']),
                          n_blocks_not_in_rucio, n_blocks_not_in_phedex))
            if len(block_report['complete']
                   ) or n_blocks_not_in_rucio or n_blocks_not_in_phedex:
                    'At %s:%s %3.0f%% complete', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio +
            if len(block_report['complete']) or n_blocks_not_in_rucio:
                    'At %s:%s %3.0f%% completely added', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio))
            # Truncate lists if we want to reduce cycle time
            if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION:
                block_report['not_rucio'] = set(
                n_blocks_not_in_rucio = len(block_report['not_rucio'])
            if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION:
                block_report['not_phedex'] = set(
                n_blocks_not_in_phedex = len(block_report['not_phedex'])

            logging.info('Adding   %6d blocks to   Rucio for %s:%s',
                         n_blocks_not_in_rucio, site, prefix)
            for block in block_report['not_rucio']:
                logging.info('Adding to rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)

            logging.info('Removing %6d blocks from Rucio for %s:%s',
                         n_blocks_not_in_phedex, site, prefix)
            for block in block_report['not_phedex']:
                logging.info('Removing from rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)

            for block in block_report['incomplete']:
                logging.warn('Redoing sync for %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)

            logging.info('Finished syncing                      %s:%s' %
                         (site, prefix))

    def chunks_to_sync(self):
        Turn the config into a list of site/prefix pairs which need to be synced
        :return: The site prefix pairs

        to_sync = []

        for site, site_config in self.config.items():
            print('Site %s (%s)is ok %s' %
                  (site, type(site), site not in ['default', 'main']))
            if site not in ['default', 'main']:
                if site_config.get('multi_das_calls', False):
                    for prefix in list(string.ascii_letters + string.digits):
                        if (('CERN' in site) or ('FNAL' in site) or
                            ('_Tape' in site)) and prefix == 'S':
                            for fnal_prefix in ('Sc', 'Se', 'Si', 'Sp', 'St',
                                                'SI', 'SM', 'ST', 'SU', 'SV',
                                                'SS', 'Su', 'SP', 'SL'):
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site)) and prefix == 'M':
                            for fnal_prefix in ('Ma', 'MC', 'ME', 'Mi', 'Mo',
                                                'MS', 'Mu'):
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site)) and prefix == 'D':
                            for fnal_prefix in ('D0', 'Da', 'Di', 'DM', 'Do',
                                                'Dp', 'DP', 'Ds', 'DS', 'DY'):
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site)) and prefix == 'T':
                            for fnal_prefix in ('T1', 'T4', 'T5', 'TH', 'TK',
                                                'TO', 'TA', 'TB', 'TC', 'TG',
                                                'TZ', 'T_', 'TS', 'TT', 'TW',
                                                'Tk', 'To', 'Ta', 'Tb', 'Te',
                                                'Tp', 'Tr', 'Ts', 'Tt', 'Tw',
                                to_sync.append((site, fnal_prefix))
                        elif (('CERN' in site) or
                              ('FNAL' in site)) and prefix == 'H':
                            for fnal_prefix in ('H0', 'H1', 'H2', 'H3', 'H4',
                                                'H5', 'H6', 'Ha', 'HA', 'Hc',
                                                'He', 'HE', 'HF', 'Hi', 'HI',
                                                'HJ', 'HL', 'Hp', 'HP', 'Hs',
                                                'HS', 'HT', 'HV', 'HW', 'Hy',
                                to_sync.append((site, fnal_prefix))
                        elif (('T0' in site) or ('FNAL' in site) or
                              ('_Tape' in site) or
                              ('_CTA' in site)) and prefix == 'C':
                            for fnal_prefix in ('Ca', 'CE', 'CG', 'Ch', 'CI',
                                                'CM', 'Co', 'CS'):
                                to_sync.append((site, fnal_prefix))
                        elif (('CERN' in site) or
                              ('FNAL' in site)) and prefix == 'Z':
                            for fnal_prefix in ('Z0', 'Z1', 'Z2', 'Z3', 'Z4',
                                                'Z5', 'ZA', 'Zb', 'ZB', 'Zc',
                                                'ZC', 'Ze', 'ZE', 'ZG', 'ZH',
                                                'ZJ', 'ZL', 'Zm', 'ZM', 'Zn',
                                                'ZN', 'Zp', 'ZP', 'ZR', 'Zt',
                                                'ZT', 'ZU', 'ZV', 'ZZ'):
                                to_sync.append((site, fnal_prefix))
                        elif (('CERN' in site) or
                              ('FNAL' in site)) and prefix == 'G':
                            for fnal_prefix in ('G_', 'G1', 'Ga', 'Ge', 'GF',
                                                'GG', 'Gj', 'GJ', 'Gl', 'GM',
                                                'Gr', 'Gs', 'GV'):
                                to_sync.append((site, fnal_prefix))
                            to_sync.append((site, prefix))
                    to_sync.append((site, None))

        # Cut the list (keep in order but choose a random starting point)
        offset = random.randrange(len(to_sync))
        to_sync = to_sync[offset:] + to_sync[:offset]

        to_sync = [
            #     # ('T1_US_FNAL_Tape', 'ST_s-channel_4f_leptonDecays_TuneCP5_13TeV-amcatnlo-pythia8/RunIISummer19UL18RECO-106X_upgrade2018_realistic_v11_L1v1-v1'),
            ('T0_CH_CERN_Tape', 'DQ'),
            ('T0_CH_CERN_Tape', 'TAC'),
            #     # ('T1_US_FNAL_Tape', 'VBFH_HToSSTo4Tau_MH-125_TuneCUETP8M1_13TeV-powheg-pythia8/RunIISummer16DR80Premix-PUMoriond17_rp_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v2'),
            #     # ('T1_US_FNAL_Tape', 'ZeroBias1/Commissioning2018-26Apr2018-v1'),

        return to_sync

    def get_datasets_at_rse(rse, prefix=None):

        :param rse: The RSE name
        :param prefix: Character(s) to restrict the dataset search
        :return: a dictionary with <dataset name>: <number of files>

        filters = {'scope': 'cms', 'did_type': DIDType.DATASET}
        if prefix:
            filters['name'] = '/' + prefix + '*'

        account = SYNC_ACCOUNT_FMT % rse.lower()
        rule_filters = {
            'account': account,
            'scope': 'cms',
            'did_type': DIDType.DATASET

        with monitor.record_timer_block('cms_sync.time_rse_datasets'):
            synced_ds = {
                for item in list_replication_rules(filters=rule_filters)
                if item['expires_at'] is None and (
                    prefix is None or item['name'].startswith('/' + prefix))

            all_datasets = [
                for dataset in list_datasets_per_rse(rse=rse, filters=filters)

            logging.info('Getting all datasets at %s with prefix %s' %
                         (rse, prefix))

            datasets = {}

            for dataset in all_datasets:
                if dataset in synced_ds:
                    for ds in list_dataset_replicas(scope='cms',
                        if ds['rse'] == rse:
                            datasets.update({dataset: ds['available_length']})

        return datasets
Example #2
class SiteSyncer(object):
    def __init__(self, options):
        self.options = options

        self.config = load_config(options.config)
        self.last_synced = {}  # load_last_synced()
        self.phedex_svc = PhEDEx()
        self.patterns = []


    def sync_site(self, site_pair):
        Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site
        site, prefix = site_pair

        if site.endswith('_Tape'):
            pnn = site.replace('_Tape', '_MSS')
            pnn = site

        # now = int(time.time())

        # Set 1980 as the last sync date if no data exists
        # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600)
        # last_week = int(site_last_synced - 7 * 24 * 3600)

        if self.config.get('default', None):
            if self.config['default'].get('chunck', 0):
                BLOCKS_PER_ACTION = int(self.config['default']['chunck'])
            if self.config['default'].get('select', None):
                self.patterns = [self.config['default']['select']]

        with monitor.record_timer_block('cms_sync.time_site_sync'):

            r_timer = 'cms_sync.time_rucio_block_list_all'
            p_timer = 'cms_sync.time_phedex_block_list_all'
            if prefix:
                r_timer = 'cms_sync.time_rucio_block_list_partial'
                p_timer = 'cms_sync.time_phedex_block_list_partial'

            # Add touches to keep from getting killed as long as progress is being made
            with monitor.record_timer_block(p_timer):
                touch(text='PQ ' + site)
                phedex_blocks = self.phedex_svc.blocks_at_site(pnn=pnn,
            with monitor.record_timer_block(r_timer):
                touch(text='RQ ' + site)
                rucio_blocks = self.get_datasets_at_rse(rse=site,
                touch(text='DQ ' + site)

            n_blocks_in_phedex = len(phedex_blocks)
            n_blocks_in_rucio = len(rucio_blocks)

            # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed
            if not n_blocks_in_phedex and n_blocks_in_rucio:
                    "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s",
                    site, n_blocks_in_phedex, n_blocks_in_rucio, prefix)
            if not n_blocks_in_phedex and not n_blocks_in_rucio:
                    "At %s:%s, nothing in PhEDEx or Rucio. Quitting." %
                    (site, prefix))

            block_report = compare_site_blocks(phedex=phedex_blocks,

            n_blocks_not_in_rucio = len(block_report['not_rucio'])
            n_blocks_not_in_phedex = len(block_report['not_phedex'])
            n_incomplete_blocks = len(block_report['incomplete'])

            logging.info("At %s:%s In both/PhEDEx only/Rucio only: %s/%s/%s" %
                         (site, prefix, len(block_report['complete']),
                          n_blocks_not_in_rucio, n_blocks_not_in_phedex))
            if len(block_report['complete']
                   ) or n_blocks_not_in_rucio or n_blocks_not_in_phedex:
                    'At %s:%s %3.0f%% complete', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio +
            if len(block_report['complete']) or n_blocks_not_in_rucio:
                    'At %s:%s %3.0f%% completely added', site, prefix,
                    len(block_report['complete']) * 100 /
                    (len(block_report['complete']) + n_blocks_not_in_rucio))
            # Truncate lists if we want to reduce cycle time
            if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION:
                block_report['not_rucio'] = set(
                n_blocks_not_in_rucio = len(block_report['not_rucio'])
            if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION:
                block_report['not_phedex'] = set(
                n_blocks_not_in_phedex = len(block_report['not_phedex'])

            logging.info('Adding   %6d blocks to   Rucio for %s:%s',
                         n_blocks_not_in_rucio, site, prefix)
            for block in block_report['not_rucio']:
                logging.info('Adding to rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)

            logging.info('Removing %6d blocks from Rucio for %s:%s',
                         n_blocks_not_in_phedex, site, prefix)
            for block in block_report['not_phedex']:
                logging.info('Removing from rucio: %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)

            for block in block_report['incomplete']:
                logging.warn('Redoing sync for %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=pnn, rse=site)

            logging.info('Finished syncing                      %s:%s' %
                         (site, prefix))

    def chunks_to_sync(self):
        Turn the config into a list of site/prefix pairs which need to be synced
        :return: The site prefix pairs

        to_sync = []

        for site, site_config in self.config.items():
            print('Site %s (%s)is ok %s' %
                  (site, type(site), site not in ['default', 'main']))
            if site not in ['default', 'main']:
                if site_config.get('multi_das_calls', False):
                    for prefix in list(string.letters + string.digits):
                        if ('T0' in site or 'FNAL' in site) and prefix == 'S':
                            for fnal_prefix in ('Sc', 'Se', 'Si', 'Sp', 'St',
                                                'SI', 'SM', 'ST', 'SU', 'SV'):
                                to_sync.append((site, fnal_prefix))
                        elif 'FNAL' in site and prefix == 'M':
                            for fnal_prefix in ('Ma', 'MC', 'ME', 'Mi', 'Mo',
                                                'MS', 'Mu'):
                                to_sync.append((site, fnal_prefix))
                        elif ('T0' in site
                              or 'FNAL' in site) and prefix == 'D':
                            for fnal_prefix in ('Da', 'Di', 'DM', 'Do', 'DP',
                                                'Ds', 'DS', 'DY'):
                                to_sync.append((site, fnal_prefix))
                        elif ('T0' in site
                              or 'FNAL' in site) and prefix == 'T':
                            for fnal_prefix in ('T1', 'T4', 'T5', 'TH', 'TK',
                                                'TO', 'TA', 'TB', 'TC', 'TG',
                                                'TZ', 'T_', 'TT', 'TW', 'Tk',
                                                'To', 'Ta', 'Tb', 'Te', 'Tp',
                                                'Tr', 'Ts', 'Tt', 'Tw'):
                                to_sync.append((site, fnal_prefix))
                        elif ('T0' in site
                              or 'FNAL' in site) and prefix == 'H':
                            for fnal_prefix in ('H0', 'H1', 'Ha', 'He', 'Hi',
                                                'HJ', 'Hp', 'HP', 'Hs', 'HS',
                                                'HT', 'HV', 'HW', 'HZ'):
                                to_sync.append((site, fnal_prefix))
                            to_sync.append((site, prefix))
                    to_sync.append((site, None))

        # Cut the list (keep in order but choose a random starting point)
        offset = random.randrange(len(to_sync))
        to_sync = to_sync[offset:] + to_sync[:offset]

        return to_sync

    def get_datasets_at_rse(rse, prefix=None):

        :param rse: The RSE name
        :param prefix: Character(s) to restrict the dataset search
        :return: a dictionary with <dataset name>: <number of files>

        filters = {'scope': 'cms', 'did_type': DIDType.DATASET}
        if prefix:
            filters['name'] = '/' + prefix + '*'

        account = SYNC_ACCOUNT_FMT % rse.lower()
        rule_filters = {
            'account': account,
            'scope': 'cms',
            'did_type': DIDType.DATASET

        with monitor.record_timer_block('cms_sync.time_rse_datasets'):
            synced_ds = {
                for item in list_replication_rules(filters=rule_filters)
                if item['expires_at'] is None and (
                    prefix is None or item['name'].startswith('/' + prefix))

            all_datasets = [
                for dataset in list_datasets_per_rse(rse=rse, filters=filters)

            logging.info('Getting all datasets at %s with prefix %s' %
                         (rse, prefix))

            datasets = {}

            for dataset in all_datasets:
                if dataset in synced_ds:
                    for ds in list_dataset_replicas(scope='cms',
                        if ds['rse'] == rse:
                            datasets.update({dataset: ds['available_length']})

        return datasets
class SiteSyncer(object):
    def __init__(self, options):
        self.options = options

        self.config = load_config(options.config)
        self.last_synced = {}  # load_last_synced()
        self.phedex_svc = PhEDEx()


    def sync_site(self, site_pair):
        Sync a site defined by a site_pair of (site, prefix). Prefix can be None to sync all blocks in the site
        site, prefix = site_pair
        # now = int(time.time())

        # Set 1980 as the last sync date if no data exists
        # site_last_synced = self.last_synced.get(site_pair, 10 * 365 * 24 * 3600)
        # last_week = int(site_last_synced - 7 * 24 * 3600)

        if self.config.get('default', None):
            if self.config['default'].get('chunck', 0):
                BLOCKS_PER_ACTION = int(self.config['default']['chunck'])

        with monitor.record_timer_block('cms_sync.time_site_sync'):

            r_timer = 'cms_sync.time_rucio_block_list_all'
            p_timer = 'cms_sync.time_phedex_block_list_all'
            if prefix:
                r_timer = 'cms_sync.time_rucio_block_list_partial'
                p_timer = 'cms_sync.time_phedex_block_list_partial'

            with monitor.record_timer_block(p_timer):
                phedex_blocks = self.phedex_svc.blocks_at_site(pnn=site,
            with monitor.record_timer_block(r_timer):
                rucio_blocks = self.get_datasets_at_rse(rse=site,

            n_blocks_in_phedex = len(phedex_blocks)
            n_blocks_in_rucio = len(rucio_blocks)

            # FIXME: This is refusing to delete everything from Rucio. Not clear it's needed
            if not n_blocks_in_phedex and n_blocks_in_rucio:
                    "At %s found %s blocks in PhEDEx and %s in Rucio with prefix %s",
                    site, n_blocks_in_phedex, n_blocks_in_rucio, prefix)
            if not n_blocks_in_phedex and not n_blocks_in_rucio:
                    "At %s:%s, nothing in PhEDEx or Rucio. Quitting." %
                    (site, prefix))

            block_report = compare_site_blocks(phedex=phedex_blocks,

            n_blocks_not_in_rucio = len(block_report['not_rucio'])
            n_blocks_not_in_phedex = len(block_report['not_phedex'])
            n_incomplete_blocks = len(block_report['incomplete'])

            logging.info("At %s: In both/PhEDEx only/Rucio only: %s/%s/%s" %
                         (site, len(block_report['complete']),
                          n_blocks_not_in_rucio, n_blocks_not_in_phedex))

            # Truncate lists if we want to reduce cycle time
            if BLOCKS_PER_ACTION and n_blocks_not_in_rucio > BLOCKS_PER_ACTION:
                block_report['not_rucio'] = set(
                n_blocks_not_in_rucio = len(block_report['not_rucio'])
            if BLOCKS_PER_ACTION and n_blocks_not_in_phedex > BLOCKS_PER_ACTION:
                block_report['not_phedex'] = set(
                n_blocks_not_in_phedex = len(block_report['not_phedex'])

            logging.info('Adding   %6d blocks to   Rucio for %s:%s',
                         n_blocks_not_in_rucio, site, prefix)
            for block in block_report['not_rucio']:
                bs = BlockSyncer(block_name=block, pnn=site, rse=site)

            logging.info('Removing %6d blocks from Rucio for %s:%s',
                         n_blocks_not_in_phedex, site, prefix)
            for block in block_report['not_phedex']:
                bs = BlockSyncer(block_name=block, pnn=site, rse=site)

            for block in block_report['incomplete']:
                logging.warn('Redoing sync for %s at %s', block, site)
                bs = BlockSyncer(block_name=block, pnn=site, rse=site)

            logging.info('Finished syncing                      %s:%s' %
                         (site, prefix))

        # FIXME: Resurrect code to check for size differences

        # self.last_synced[site_pair] = now
        # save_last_synced(self.last_synced)

    def chunks_to_sync(self):
        Turn the config into a list of site/prefix pairs which need to be synced
        :return: The site prefix pairs

        to_sync = []

        for site, site_config in self.config.items():
            if site not in ['default', 'main']:
                if site_config.get('multi_das_calls', False):
                    for prefix in list(string.letters + string.digits):
                        to_sync.append((site, prefix))
                    to_sync.append((site, None))
        return to_sync

    def get_datasets_at_rse(rse, prefix=None):

        :param rse: The RSE name
        :param prefix: Character(s) to restrict the dataset search
        :return: a dictionary with <dataset name>: <number of files>

        filters = {'scope': 'cms', 'did_type': DIDType.DATASET}
        if prefix:
            filters['name'] = '/' + prefix + '*'

        with monitor.record_timer_block('cms_sync.time_rse_datasets'):
            all_datasets = [
                for dataset in list_datasets_per_rse(rse=rse, filters=filters)

            for dataset in all_datasets:
                datasets = {
                    dataset: ds['available_length']
                    for ds in list_dataset_replicas(
                        scope='cms', name=dataset, deep=True)
                    if ds['rse'] == rse

            # datasets = {dataset['name']: dataset['available_length']
            #             for dataset in list_datasets_per_rse(rse=rse, filters=filters, deep=True)}

        return datasets