Beispiel #1
0
def compare_data_lists(blocks, datasets, pnn):
    """
    Compare the list of blocks at pnn and dataset at rse
    :blocks:   list of file blocks
    :datasets: list of rucio datasets
    :pnn:      phedex node name

    return the liste of datasets to add, remove and update
    as in DEFAULT_DATADIFF_DICT
    """

    ret = copy.deepcopy(DEFAULT_DATADIFF_DICT)

    dataitems = list(set(blocks.keys() + datasets.keys()))

    for dataset in dataitems:
        if dataset not in datasets:
            ret['missing'].append(dataset)
            ret['summary']['missing'] += 1

        elif dataset not in blocks:
            ret['to_remove'].append(dataset)
            ret['summary']['to_remove'] += 1

        elif blocks[dataset] != datasets[dataset]:
            logging.warning("Dataset %s at pnn %s to update", dataset, pnn)
            ret['to_update'].append(dataset)
            ret['summary']['to_update'] += 1

        ret['summary']['tot'] += 1

    return ret
Beispiel #2
0
def _launch_workers(pnns, datasets, pool, options, pcli):

    procs = []

    rcli = Client()

    for pnn in pnns:

        account = options.account or SYNC_ACCOUNT_FMT % pnn.lower()
        #        try:
        #            rcli = Client(account=account)
        #        except CannotAuthenticate:
        #            logging.warning("cannot authenticate with account %s, skipping pnn %s",
        #                            account, pnn)
        #            continue

        rse = list(rcli.list_rses('pnn=%s&cms_type=real' % pnn))

        if not rse:
            logging.warning("cannot find real rse for pnn %s, skipping", pnn)
            continue

        rse = rse[0]['rse']

        for dataset in datasets:
            procs.append(
                pool.apply_async(
                    dataset_replica_update,
                    (dataset, pnn, rse, pcli, account, options.dry)))

    return procs
Beispiel #3
0
def block_sync(pnn, rds, pcli, rcli):
    """
    Synchronize one rucio dataset at one rse
    :pnn:    pnn.
    :rds:    rucio dataset.
    :pcli:   phedex client.
    :rcli:   rucio client.
    """

    conf = _get_config(pnn)

    if 'block_verbosity' in conf:
        logging.my_lvl(conf['block_verbosity'])

    if not conf['run']:
        return 'aborted'

    if not _ping(rcli):
        logging.warning('Cannot Ping, aborting.')
        return 'aborted'

    ret = _replica_update(dataset=rds,
                          pnn=pnn,
                          rse=conf['rse'],
                          pcli=pcli,
                          rcli=rcli,
                          dry=conf['dry'])

    return ret
Beispiel #4
0
def dataset_replica_update(dataset, pnn, rse, pcli, account, dry):
    """
    Just wrapping the update method.
    """

    try:
        rcli = Client(account=account)
    except CannotAuthenticate:
        logging.warning("cannot authenticate with account %s, skipping pnn %s",
                        account, pnn)
        return None

    logging.my_fmt(label='update:rse=%s:rds=%s' % (pnn, dataset))

    logging.notice('Starting.')

    try:
        ret = _replica_update(dataset, pnn, rse, pcli, rcli, dry)

    #pylint: disable=broad-except
    except Exception as exc:
        logging.error('Exception %s raised: %s',
                      type(exc).__name__,
                      traceback.format_exc().replace('\n', '~~'))
        return None

    logging.notice('Finished %s.', ret)
Beispiel #5
0
    def register_container(self, dry=False):
        """
        Register container of the dataset
        (only if there is a dataset replica on the pnn)
        :dry: Dry run. Default false.
        """

        try:
            self.rcli.get_did(scope=self.scope, name=self.container)
            return 'exists'
        except DataIdentifierNotFound:
            pass

        if self.is_at_pnn and dry:
            logging.dry('Create container %s in scope %s.', self.container,
                        self.scope)
            return 'created'
        elif self.is_at_pnn:
            logging.verbose('Create container %s in scope %s.', self.container,
                            self.scope)
            try:
                self.rcli.add_container(scope=self.scope,
                                        name=self.container,
                                        lifetime=self.lifetime)

            except DataIdentifierAlreadyExists:
                logging.warning('Container was created in the meanwhile')
                return 'exists'

            return 'created'

        return 'skipped'
Beispiel #6
0
def _pnn_abort(pnn, summary, rcli):
    """
    checking if the running flag is False
    and in case aborting.
    """
    conf = _get_config(pnn)

    if not _ping(rcli):
        logging.warning('Cannot Ping. Aborting')
        conf['run'] = False

    if not conf['run']:
        summary['status'] = 'aborted'
        return True

    return False
Beispiel #7
0
def _load_config(conffile, modif=None, starttime=None):
    """
    Gets the conf file and dumps it to the
    working copy
    :conffile:  file to be loaded
    :modif:     dictionnary with modifications

    returns the content dictionnary
    """

    starttime = starttime or datetime.now()

    try:
        conf = _open_yaml(conffile, modif)

    except yaml.parser.ParserError:
        logging.warning('Problem parsing config. Using loaded one.')
        conf = _open_yaml(LOADED_CONF)

    default = dict(DEFAULT_PNN_CONF, **conf.pop('default'))
    main = dict(DEFAULT_MAIN_CONF, **conf.pop('main'))

    loaded = dict({'main': main}, **{
        pnn: dict(default, **dict({'rse': pnn}, **sec))
        for pnn, sec in conf.items()
    })

    loaded = {
        name: _run_status(sec, starttime)
        for name, sec in loaded.items()
    }

    logging.my_lvl(loaded['main']['verbosity'])

    logging.debug('Loaded conf %s from %s with modif %s', loaded, conffile,
                  modif)

    with open(LOADED_CONF, 'w') as outfile:
        yaml.dump(loaded, outfile, default_flow_style=False)

    return loaded
Beispiel #8
0
def sync(config, logs):
    """
    Main Sync process
    """

    logging.my_logfile(logs=logs)
    logging.my_fmt(label='main_sync')
    starttime = datetime.now()
    modify = {}
    workers = {}  # this is the array of running pnns
    pnns = None  # this is the array of pnn to be launched
    pool = None

    pcli = PhEDEx()

    install_mp_handler()

    conf = _load_config(config, modify, starttime)

    pnns = []

    size = conf['main']['pool']

    logging.summary('Starting')

    while conf['main']['run']:

        if pool is None:
            logging.notice('Started pool of size %d', size)
            pool = multiprocessing.NDPool(size)

        add = [
            pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run']
            if pnn not in workers if pnn not in pnns
        ]

        pnns += add

        random.shuffle(pnns)

        if not _ping():
            logging.warning('Cannot ping, not launching workers')
        else:
            _launch_workers(pool, workers, pnns, pcli)
            pnns = []

        _poll_workers(workers, pnns)

        conf = _load_config(config, modify, starttime)

        if not conf['main']['run'] or\
            conf['main']['pool'] != size:

            # trigger draining of all workers, close the pool and wait
            # for the task to be over
            conf = _load_config(config, {'default': {'run': False}}, starttime)
            _drain_up(workers, pnns)
            workers = {}
            pool.close()
            pool = None
            size = conf['main']['pool']

        else:
            time.sleep(conf['main']['sleep'])

    logging.summary('Exiting.')

    return config
Beispiel #9
0
def pnn_sync(pnn, pcli):
    """
    Synchronize one rucio dataset at one rse
    :pnn:    phedex node name.
    :pcli:   phedex client.
    """

    summary = copy.deepcopy(DEFAULT_PNN_SUMMARY)

    conf = _get_config(pnn)
    summary['conf'] = conf

    if 'verbosity' in conf:
        logging.my_lvl(conf['verbosity'])

    rcli = Client(account=SYNC_ACCOUNT_FMT % pnn.lower())

    if _pnn_abort(pnn, summary, rcli):
        return summary

    diff = get_node_diff(pnn, pcli, rcli, conf)
    summary['timing'].update(diff['timing'])
    diff = diff['return']
    summary['diff'] = diff['summary']

    if (diff['summary']['tot'] == diff['summary']['to_remove']) and \
        not conf['allow_clean']:
        logging.warning('All datasets to be removed. Aborting.')
        summary['status'] = 'aborted'
        return summary

    logging.notice("Got diff=%s, timing=%s", summary['diff'],
                   summary['timing'])

    if _pnn_abort(pnn, summary, rcli):
        return summary

    workers = get_timing(_launch_pnn_workers(conf, diff, pnn, pcli, rcli),
                         summary['timing'])

    summary['workers'] = len(workers)

    logging.notice("Launched %d workers, pool size %d, timing %s",
                   summary['workers'], int(conf['pool']),
                   summary['timing']['_launch_pnn_workers'])

    left = int(conf['chunck']) - summary['workers'] + int(
        conf['min_deletions'])

    if left > 0:
        workers_st = get_timing(
            _launch_pnn_workers_st(left, diff, pnn, pcli, rcli),
            summary['timing'])

        summary['workers_st'] = len(workers_st)

        logging.notice("Launched %d single thread workers, timing %s",
                       summary['workers_st'],
                       summary['timing']['_launch_pnn_workers_st'])

        workers = dict(workers, **workers_st)

    _get_pnn_workers(workers, summary)

    summary['status'] = 'finished'

    return summary
Beispiel #10
0
    def update_replicas(self, dry=False):
        """
        Add or removes replicas for the dataset at rse.
        :dry:  Drydrun. default false
        """

        logging.notice('Updating replicas for %s:%s at %s' %
                       (self.scope, self.dataset, self.rse))

        replicas = self.rcli.list_replicas([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                           rse_expression='rse=%s' % self.rse)

        rrepl = [repl['name'] for repl in replicas]

        prepl = [repl for repl in self.replicas.keys()]

        missing = list(set(prepl) - set(rrepl))

        to_remove = list(set(rrepl) - set(prepl))

        if missing and dry:
            logging.dry('Adding replicas %s to rse %s.', str(missing),
                        self.rse)

        elif missing:
            logging.verbose('Adding replicas %s to rse %s.', str(missing),
                            self.rse)

            self.rcli.add_replicas(rse=self.rse,
                                   files=[{
                                       'scope':
                                       self.scope,
                                       'name':
                                       self.replicas[lfn]['name'],
                                       'adler32':
                                       self.replicas[lfn]['checksum'],
                                       'bytes':
                                       self.replicas[lfn]['size'],
                                   } for lfn in missing])

            # missing files that are not in the list of dataset files
            # are to be attached.
            lfns = [
                item['name']
                for item in self.rcli.list_files(scope=self.scope,
                                                 name=self.dataset)
            ]

            missing_lfns = list(set(missing) - set(lfns))
            if missing_lfns:
                logging.verbose('Attaching lfns %s to dataset %s.',
                                str(missing_lfns), self.dataset)

                try:
                    self.rcli.attach_dids(
                        scope=self.scope,
                        name=self.dataset,
                        dids=[{
                            'scope': self.scope,
                            'name': lfn
                        } for lfn in list(set(missing) - set(lfns))])

                except FileAlreadyExists:
                    logging.warning('Trying to attach already existing files.')

        if to_remove and dry:
            logging.dry('Removing replicas %s from rse %s.', str(to_remove),
                        self.rse)

        elif to_remove:
            logging.verbose('Removing replicas %s from rse %s.',
                            str(to_remove), self.rse)
            for to_remove_chunk in chunks(to_remove, REMOVE_CHUNK_SIZE):
                attempt = 0
                while True:
                    attempt += 1
                    try:
                        self.rcli.delete_replicas(rse=self.rse,
                                                  files=[{
                                                      'scope': self.scope,
                                                      'name': lfn,
                                                  } for lfn in to_remove_chunk
                                                         ])
                        break
                    except DatabaseException:
                        logging.warning(
                            'DatabaseException raised, retrying...')
                        if attempt > 3:
                            raise
                        time.sleep(randint(1, 5))

        return {'added': missing, 'removed': to_remove}
Beispiel #11
0
def pnn_sync(pnn, pcli):
    """
    Synchronize one rucio dataset at one rse
    :pnn:    phedex node name.
    :pcli:   phedex client.
    """
    monitor.record_counter('cms_sync.site_started')
    summary = copy.deepcopy(DEFAULT_PNN_SUMMARY)

    conf = _get_config(pnn)
    summary['conf'] = conf

    if 'verbosity' in conf:
        logging.my_lvl(conf['verbosity'])

    rcli = Client(account=SYNC_ACCOUNT_FMT % pnn.lower())

    if _pnn_abort(pnn, summary, rcli):
        return summary
# Do the loop here? with conf['multi_das']

    if conf['multi_das_calls']:
        prefixes = list(string.letters + string.digits)
        random.shuffle(prefixes)
    else:
        prefixes = [None]

    for prefix in prefixes:
        diff = get_node_diff(pnn, pcli, rcli, conf, prefix=prefix)
        summary['timing'].update(diff['timing'])
        diff = diff['return']
        summary['diff'] = diff['summary']

        if (diff['summary']['tot']
                == diff['summary']['to_remove']) and not conf['allow_clean']:
            logging.warning('All datasets to be removed. Aborting.')
            summary['status'] = 'aborted'
            continue


#            return summary

        logging.notice("Got diff=%s, timing=%s", summary['diff'],
                       summary['timing'])

        if _pnn_abort(pnn, summary, rcli):
            return summary

        workers = get_timing(_launch_pnn_workers(conf, diff, pnn, pcli, rcli),
                             summary['timing'])

    summary['workers'] = len(workers)

    logging.notice("Launched %d workers, pool size %d, timing %s",
                   summary['workers'], int(conf['pool']),
                   summary['timing']['_launch_pnn_workers'])

    left = int(conf['chunck']) - summary['workers'] + int(
        conf['min_deletions'])

    if left > 0:
        workers_st = get_timing(
            _launch_pnn_workers_st(left, diff, pnn, pcli, rcli),
            summary['timing'])

        summary['workers_st'] = len(workers_st)

        logging.notice("Launched %d single thread workers, timing %s",
                       summary['workers_st'],
                       summary['timing']['_launch_pnn_workers_st'])

        workers = dict(workers, **workers_st)

    _get_pnn_workers(workers, summary)
    monitor.record_counter('cms_sync.site_completed')

    summary['status'] = 'finished'

    return summary