def test_judge_evaluate_detach_datasetlock(self):
        """ JUDGE EVALUATOR: Test if the a datasetlock is detached correctly when removing a dataset from a container"""
        re_evaluator(once=True)

        scope = 'mock'
        files = create_files(3, scope, self.rse1, bytes=100)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        container = 'container_' + str(uuid())
        add_did(scope, container, DIDType.from_sym('CONTAINER'), 'jdoe')
        attach_dids(scope, container, [{'scope': scope, 'name': dataset}], 'jdoe')

        # Add a rule to the Container
        add_rule(dids=[{'scope': scope, 'name': container}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)

        # Check if the datasetlock is there
        locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)]
        assert(len(locks) > 0)

        detach_dids(scope, container, [{'scope': scope, 'name': dataset}])

        # Fake judge
        re_evaluator(once=True)

        locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)]
        assert(len(locks) == 0)
Example #2
0
    def test_judge_evaluate_detach_datasetlock(self):
        """ JUDGE EVALUATOR: Test if the a datasetlock is detached correctly when removing a dataset from a container"""
        re_evaluator(once=True)

        scope = InternalScope('mock', **self.vo)
        files = create_files(3, scope, self.rse1_id, bytes=100)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe)
        attach_dids(scope, dataset, files, self.jdoe)

        container = 'container_' + str(uuid())
        add_did(scope, container, DIDType.from_sym('CONTAINER'), self.jdoe)
        attach_dids(scope, container, [{'scope': scope, 'name': dataset}], self.jdoe)

        # Add a rule to the Container
        add_rule(dids=[{'scope': scope, 'name': container}], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)

        # Check if the datasetlock is there
        locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)]
        assert(len(locks) > 0)

        detach_dids(scope, container, [{'scope': scope, 'name': dataset}])

        # Fake judge
        re_evaluator(once=True)

        locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)]
        assert(len(locks) == 0)
Example #3
0
File: lock.py Project: zzaiin/Rucio
def get_dataset_locks(scope, name):
    """
    Get the dataset locks of a dataset.

    :param scope:          Scope of the dataset.
    :param name:           Name of the dataset.
    :return:               List of dicts {'rse_id': ..., 'state': ...}
    """

    return lock.get_dataset_locks(scope=scope, name=name)
Example #4
0
def get_dataset_locks(scope, name, vo='def'):
    """
    Get the dataset locks of a dataset.

    :param scope:          Scope of the dataset.
    :param name:           Name of the dataset.
    :param vo:             The VO to act on.
    :return:               List of dicts {'rse_id': ..., 'state': ...}
    """

    scope = InternalScope(scope, vo=vo)

    locks = lock.get_dataset_locks(scope=scope, name=name)

    for l in locks:
        yield api_update_return_dict(l)
Example #5
0
    def test_judge_add_dataset_to_container(self):
        """ JUDGE EVALUATOR: Test the judge when adding dataset to container"""
        scope = InternalScope('mock')
        files = create_files(3, scope, self.rse1_id)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe)
        attach_dids(scope, dataset, files, self.jdoe)

        parent_container = 'dataset_' + str(uuid())
        add_did(scope, parent_container, DIDType.from_sym('CONTAINER'),
                self.jdoe)
        # Add a first rule to the DS
        add_rule(dids=[{
            'scope': scope,
            'name': parent_container
        }],
                 account=self.jdoe,
                 copies=2,
                 rse_expression=self.T1,
                 grouping='DATASET',
                 weight=None,
                 lifetime=None,
                 locked=False,
                 subscription_id=None)
        attach_dids(scope, parent_container, [{
            'scope': scope,
            'name': dataset
        }], self.jdoe)
        # Fake judge
        re_evaluator(once=True)

        # Check if the Locks are created properly
        for file in files:
            assert (len(
                get_replica_locks(scope=file['scope'],
                                  name=file['name'])) == 2)

        # Check if the DatasetLocks are created properly
        dataset_locks = [
            lock for lock in get_dataset_locks(scope=scope, name=dataset)
        ]
        assert (len(dataset_locks) == 2)
    def test_judge_add_dataset_to_container(self):
        """ JUDGE EVALUATOR: Test the judge when adding dataset to container"""
        scope = 'mock'
        files = create_files(3, scope, self.rse1)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        parent_container = 'dataset_' + str(uuid())
        add_did(scope, parent_container, DIDType.from_sym('CONTAINER'), 'jdoe')
        # Add a first rule to the DS
        add_rule(dids=[{'scope': scope, 'name': parent_container}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)
        attach_dids(scope, parent_container, [{'scope': scope, 'name': dataset}], 'jdoe')
        # Fake judge
        re_evaluator(once=True)

        # Check if the Locks are created properly
        for file in files:
            assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2)

        # Check if the DatasetLocks are created properly
        dataset_locks = [lock for lock in get_dataset_locks(scope=scope, name=dataset)]
        assert(len(dataset_locks) == 2)
Example #7
0
    def test_add_rule_dataset_dataset(self):
        """ REPLICATION RULE (CORE): Add a replication rule on a dataset, DATASET Grouping"""
        scope = 'mock'
        files = create_files(3, scope, self.rse1)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)

        # Check if the Locks are created properly
        t1 = set([self.rse1_id, self.rse3_id, self.rse5_id])
        first_locks = None
        for file in files:
            if first_locks is None:
                first_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])])
            rse_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])])
            assert(len(t1.intersection(rse_locks)) == 2)
            assert(len(first_locks.intersection(rse_locks)) == 2)

        # Check if the DatasetLocks are created properly
        dataset_locks = [lock for lock in get_dataset_locks(scope=scope, name=dataset)]
        assert(len(t1.intersection(set([lock['rse_id'] for lock in dataset_locks]))) == 2)
        assert(len(first_locks.intersection(set([lock['rse_id'] for lock in dataset_locks]))) == 2)
Example #8
0
def rebalance_rule(parent_rule_id,
                   activity,
                   rse_expression,
                   priority,
                   source_replica_expression=None,
                   comment=None):
    """
    Rebalance a replication rule to a new RSE

    :param parent_rule_id:             Replication rule to be rebalanced.
    :param activity:                   Activity to be used for the rebalancing.
    :param rse_expression:             RSE expression of the new rule.
    :param priority:                   Priority of the newly created rule.
    :param source_replica_expression:  Source replica expression of the new rule.
    :param comment:                    Comment to set on the new rules.
    :returns:                          The new child rule id.
    """
    parent_rule = get_rule(rule_id=parent_rule_id)

    if parent_rule['expires_at'] is None:
        lifetime = None
    else:
        lifetime = (parent_rule['expires_at'] -
                    datetime.utcnow()).days * 24 * 3600 + (
                        parent_rule['expires_at'] - datetime.utcnow()).seconds

    if parent_rule['grouping'] == RuleGrouping.ALL:
        grouping = 'ALL'
    elif parent_rule['grouping'] == RuleGrouping.NONE:
        grouping = 'NONE'
    else:
        grouping = 'DATASET'

    # check if concurrent replica at target rse does not exist
    concurrent_replica = False
    try:
        for lock in get_dataset_locks(parent_rule['scope'],
                                      parent_rule['name']):
            if lock['rse'] == rse_expression:
                concurrent_replica = True
    except Exception as error:
        concurrent_replica = True
        print 'Exception: get_dataset_locks not feasible for %s %s:' % (
            parent_rule['scope'], parent_rule['name'])
        raise error
    if concurrent_replica:
        return 'Concurrent replica exists at target rse!'
    print concurrent_replica

    child_rule = add_rule(
        dids=[{
            'scope': parent_rule['scope'],
            'name': parent_rule['name']
        }],
        account=parent_rule['account'],
        copies=parent_rule['copies'],
        rse_expression=rse_expression,
        grouping=grouping,
        weight=parent_rule['weight'],
        lifetime=lifetime,
        locked=parent_rule['locked'],
        subscription_id=parent_rule['subscription_id'],
        source_replica_expression=source_replica_expression,
        activity=activity,
        notify=parent_rule['notification'],
        purge_replicas=parent_rule['purge_replicas'],
        ignore_availability=False,
        comment=parent_rule['comments'] if not comment else comment,
        ask_approval=False,
        asynchronous=False,
        priority=priority)[0]

    update_rule(rule_id=parent_rule_id,
                options={
                    'child_rule_id': child_rule,
                    'lifetime': 0
                })
    return child_rule
Example #9
0
def rebalance_rse(rse,
                  max_bytes=1E9,
                  max_files=None,
                  dry_run=False,
                  exclude_expression=None,
                  comment=None,
                  force_expression=None,
                  mode=None,
                  priority=3,
                  source_replica_expression=None,
                  session=None):
    """
    Rebalance data from an RSE

    :param rse:                        RSE to rebalance data from.
    :param max_bytes:                  Maximum amount of bytes to rebalance.
    :param max_files:                  Maximum amount of files to rebalance.
    :param dry_run:                    Only run in dry-run mode.
    :param exclude_expression:         Exclude this rse_expression from being target_rses.
    :param comment:                    Comment to set on the new rules.
    :param force_expression:           Force a specific rse_expression as target.
    :param mode:                       BB8 mode to execute (None=normal, 'decomission'=Decomission mode)
    :param priority:                   Priority of the new created rules.
    :param source_replica_expression:  Source replica expression of the new created rules.
    :param session:                    The database session.
    :returns:                          List of rebalanced datasets.
    """
    rebalanced_bytes = 0
    rebalanced_files = 0
    rebalanced_datasets = []
    rse_attributes = list_rse_attributes(rse=rse, session=session)

    print '***************************'
    print 'BB8 - Execution Summary'
    print 'Mode:    %s' % ('STANDARD' if mode is None else mode.upper())
    print 'Dry Run: %s' % (dry_run)
    print '***************************'

    print 'scope:name rule_id bytes(Gb) target_rse child_rule_id'

    for scope, name, rule_id, rse_expression, subscription_id, bytes, length in list_rebalance_rule_candidates(
            rse=rse, mode=mode):
        if force_expression is not None and subscription_id is not None:
            continue

        if rebalanced_bytes + bytes > max_bytes:
            continue
        if max_files:
            if rebalanced_files + length > max_files:
                continue

        try:
            other_rses = [
                r['rse_id']
                for r in get_dataset_locks(scope, name, session=session)
            ]

            # Select the target RSE for this rule
            try:
                target_rse_exp = select_target_rse(
                    current_rse=rse,
                    rse_expression=rse_expression,
                    subscription_id=subscription_id,
                    rse_attributes=rse_attributes,
                    other_rses=other_rses,
                    exclude_expression=exclude_expression,
                    force_expression=force_expression,
                    session=session)
                # Rebalance this rule
                if not dry_run:
                    child_rule_id = rebalance_rule(
                        parent_rule_id=rule_id,
                        activity='Data rebalancing',
                        rse_expression=target_rse_exp,
                        priority=priority,
                        source_replica_expression=source_replica_expression,
                        comment=comment)
                else:
                    child_rule_id = ''
            except (InsufficientTargetRSEs, DuplicateRule, RuleNotFound,
                    InsufficientAccountLimit):
                continue
            print '%s:%s %s %d %s %s' % (scope, name, str(rule_id),
                                         int(bytes / 1E9), target_rse_exp,
                                         child_rule_id)
            if 'Concurrent' in str(child_rule_id):
                print str(child_rule_id)
                continue
            rebalanced_bytes += bytes
            rebalanced_files += length
            rebalanced_datasets.append(
                (scope, name, bytes, length, target_rse_exp, rule_id,
                 child_rule_id))
        except Exception as error:
            print 'Exception %s occured while rebalancing %s:%s, rule_id: %s!' % (
                str(error), scope, name, str(rule_id))
            raise error

    print 'BB8 is rebalancing %d Gb of data (%d rules)' % (int(
        rebalanced_bytes / 1E9), len(rebalanced_datasets))
    return rebalanced_datasets
Example #10
0
def atropos(thread,
            bulk,
            date_check,
            dry_run=True,
            grace_period=86400,
            once=True,
            unlock=False,
            spread_period=0,
            purge_replicas=False):
    """
    Creates an Atropos Worker that gets a list of rules which have an eol_at expired and delete them.

    :param thread: Thread number at startup.
    :param bulk: The number of requests to process.
    :param grace_period: The grace_period for the rules.
    :param once: Run only once.
    """

    sleep_time = 60

    executable = 'atropos'
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()
    heartbeat.sanity_check(executable=executable, hostname=hostname)
    now = datetime.datetime.now()
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    time.sleep(10)
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'], hb['nr_threads'])
    logging.debug(prepend_str + 'Starting worker')
    summary = {}
    lifetime_exceptions = {}
    rand = random.Random(hb['assign_thread'])
    for excep in rucio.core.lifetime_exception.list_exceptions(
            exception_id=None,
            states=[
                LifetimeExceptionsState.APPROVED,
            ],
            session=None):
        key = '{}:{}'.format(excep['scope'].internal, excep['name'])
        if key not in lifetime_exceptions:
            lifetime_exceptions[key] = excep['expires_at']
        elif lifetime_exceptions[key] < excep['expires_at']:
            lifetime_exceptions[key] = excep['expires_at']
    logging.debug(prepend_str +
                  '%s active exceptions' % len(lifetime_exceptions))
    if not dry_run and date_check > now:
        logging.error(
            prepend_str +
            'Atropos cannot run in non-dry-run mode for date in the future')
    else:
        while not GRACEFUL_STOP.is_set():

            hb = heartbeat.live(executable, hostname, pid, hb_thread)
            prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'],
                                                 hb['nr_threads'])

            stime = time.time()
            try:
                rules = get_rules_beyond_eol(date_check,
                                             thread,
                                             hb['nr_threads'],
                                             session=None)
                logging.info(prepend_str + '%s rules to process' %
                             (len(rules)))
                for rule_idx, rule in enumerate(rules, start=1):
                    did = '%s:%s' % (rule.scope, rule.name)
                    did_key = '{}:{}'.format(rule.scope.internal, rule.name)
                    logging.debug(prepend_str +
                                  'Working on rule %s on DID %s on %s' %
                                  (rule.id, did, rule.rse_expression))

                    if (rule_idx % 1000) == 0:
                        logging.info(prepend_str + '%s/%s rules processed' %
                                     (rule_idx, len(rules)))

                    # We compute the expected eol_at
                    try:
                        rses = parse_expression(rule.rse_expression,
                                                filter={'vo': rule.account.vo})
                    except InvalidRSEExpression:
                        logging.warning(
                            prepend_str +
                            'Rule %s has an RSE expression that results in an empty set: %s'
                            % (rule.id, rule.rse_expression))
                        continue
                    eol_at = rucio.core.lifetime_exception.define_eol(
                        rule.scope, rule.name, rses)
                    if eol_at != rule.eol_at:
                        logging.warning(
                            prepend_str +
                            'The computed eol %s differs from the one recorded %s for rule %s on %s at %s'
                            % (eol_at, rule.eol_at, rule.id, did,
                               rule.rse_expression))
                        try:
                            update_rule(rule.id, options={'eol_at': eol_at})
                        except RuleNotFound:
                            logging.warning(prepend_str +
                                            'Cannot find rule %s on DID %s' %
                                            (rule.id, did))
                            continue

                    # Check the exceptions
                    if did_key in lifetime_exceptions:
                        if eol_at > lifetime_exceptions[did_key]:
                            logging.info(
                                prepend_str +
                                'Rule %s on DID %s on %s has longer expiration date than the one requested : %s'
                                % (rule.id, did, rule.rse_expression,
                                   lifetime_exceptions[did_key]))
                        else:
                            # If eol_at < requested extension, update eol_at
                            logging.info(
                                prepend_str +
                                'Updating rule %s on DID %s on %s according to the exception till %s'
                                % (rule.id, did, rule.rse_expression,
                                   lifetime_exceptions[did_key]))
                            eol_at = lifetime_exceptions[did_key]
                            try:
                                update_rule(rule.id,
                                            options={
                                                'eol_at':
                                                lifetime_exceptions[did_key]
                                            })
                            except RuleNotFound:
                                logging.warning(
                                    prepend_str +
                                    'Cannot find rule %s on DID %s' %
                                    (rule.id, did))
                                continue

                    # Now check that the new eol_at is expired
                    if eol_at and eol_at < date_check:
                        no_locks = True
                        for lock in get_dataset_locks(rule.scope, rule.name):
                            if lock['rule_id'] == rule[4]:
                                no_locks = False
                                if lock['rse_id'] not in summary:
                                    summary[lock['rse_id']] = {}
                                if did_key not in summary[lock['rse_id']]:
                                    summary[lock['rse_id']][did_key] = {
                                        'length': lock['length'] or 0,
                                        'bytes': lock['bytes'] or 0
                                    }
                        if no_locks:
                            logging.warning(
                                prepend_str +
                                'Cannot find a lock for rule %s on DID %s' %
                                (rule.id, did))
                        if not dry_run:
                            lifetime = grace_period + rand.randrange(
                                spread_period + 1)
                            logging.info(
                                prepend_str +
                                'Setting %s seconds lifetime for rule %s' %
                                (lifetime, rule.id))
                            options = {'lifetime': lifetime}
                            if purge_replicas:
                                options['purge_replicas'] = True
                            if rule.locked and unlock:
                                logging.info(prepend_str + 'Unlocking rule %s',
                                             rule.id)
                                options['locked'] = False
                            try:
                                update_rule(rule.id, options=options)
                            except RuleNotFound:
                                logging.warning(
                                    prepend_str +
                                    'Cannot find rule %s on DID %s' %
                                    (rule.id, did))
                                continue
            except Exception:
                exc_type, exc_value, exc_traceback = exc_info()
                logging.critical(''.join(
                    format_exception(exc_type, exc_value,
                                     exc_traceback)).strip())

            for rse_id in summary:
                tot_size, tot_files, tot_datasets = 0, 0, 0
                for did in summary[rse_id]:
                    tot_datasets += 1
                    tot_files += summary[rse_id][did].get('length', 0)
                    tot_size += summary[rse_id][did].get('bytes', 0)
                vo = get_rse_vo(rse_id=rse_id)
                logging.info(
                    prepend_str +
                    'For RSE %s %s %s datasets will be deleted representing %s files and %s bytes'
                    % (get_rse_name(rse_id=rse_id), '' if vo == 'def' else
                       'on VO ' + vo, tot_datasets, tot_files, tot_size))

            if once:
                break
            else:
                tottime = time.time() - stime
                if tottime < sleep_time:
                    logging.info(prepend_str + 'Will sleep for %s seconds' %
                                 (str(sleep_time - tottime)))
                    time.sleep(sleep_time - tottime)
                    continue

        logging.info(prepend_str + 'Graceful stop requested')
        heartbeat.die(executable, hostname, pid, hb_thread)
        logging.info(prepend_str + 'Graceful stop done')
Example #11
0
def rebalance_rse(rse_id,
                  max_bytes=1E9,
                  max_files=None,
                  dry_run=False,
                  exclude_expression=None,
                  comment=None,
                  force_expression=None,
                  mode=None,
                  priority=3,
                  source_replica_expression='*\\bb8-enabled=false',
                  session=None,
                  logger=logging.log):
    """
    Rebalance data from an RSE
    :param rse_id:                     RSE to rebalance data from.
    :param max_bytes:                  Maximum amount of bytes to rebalance.
    :param max_files:                  Maximum amount of files to rebalance.
    :param dry_run:                    Only run in dry-run mode.
    :param exclude_expression:         Exclude this rse_expression from being target_rses.
    :param comment:                    Comment to set on the new rules.
    :param force_expression:           Force a specific rse_expression as target.
    :param mode:                       BB8 mode to execute (None=normal, 'decomission'=Decomission mode)
    :param priority:                   Priority of the new created rules.
    :param source_replica_expression:  Source replica expression of the new created rules.
    :param session:                    The database session.
    :param logger:                     Logger.
    :returns:                          List of rebalanced datasets.
    """
    rebalanced_bytes = 0
    rebalanced_files = 0
    rebalanced_datasets = []

    rse_attributes = list_rse_attributes(rse_id=rse_id, session=session)
    src_rse = get_rse_name(rse_id=rse_id)

    logger(logging.INFO, '***************************')
    logger(logging.INFO, 'BB8 - Execution Summary')
    logger(logging.INFO,
           'Mode:    %s' % ('STANDARD' if mode is None else mode.upper()))
    logger(logging.INFO, 'Dry Run: %s' % (dry_run))
    logger(logging.INFO, '***************************')

    for scope, name, rule_id, rse_expression, subscription_id, bytes_, length, fsize in list_rebalance_rule_candidates(
            rse_id=rse_id, mode=mode):
        if force_expression is not None and subscription_id is not None:
            continue

        if rebalanced_bytes + bytes_ > max_bytes:
            continue
        if max_files:
            if rebalanced_files + length > max_files:
                continue

        try:
            rule = get_rule(rule_id=rule_id)
            other_rses = [
                r['rse_id']
                for r in get_dataset_locks(scope, name, session=session)
            ]
            # Select the target RSE for this rule
            try:
                target_rse_exp = select_target_rse(
                    parent_rule=rule,
                    current_rse_id=rse_id,
                    rse_expression=rse_expression,
                    subscription_id=subscription_id,
                    rse_attributes=rse_attributes,
                    other_rses=other_rses,
                    exclude_expression=exclude_expression,
                    force_expression=force_expression,
                    session=session)
                # Rebalance this rule
                if not dry_run:
                    child_rule_id = rebalance_rule(
                        parent_rule=rule,
                        activity='Data rebalancing',
                        rse_expression=target_rse_exp,
                        priority=priority,
                        source_replica_expression=source_replica_expression,
                        comment=comment)
                else:
                    child_rule_id = ''
            except (InsufficientTargetRSEs, DuplicateRule, RuleNotFound,
                    InsufficientAccountLimit) as err:
                logger(logging.ERROR, str(err))
                continue
            if child_rule_id is None:
                logger(
                    logging.WARNING,
                    'A rule for %s:%s already exists on %s. It cannot be rebalanced',
                    scope, name, target_rse_exp)
                continue
            logger(
                logging.INFO,
                'Rebalancing %s:%s rule %s (%f GB) from %s to %s. New rule %s',
                scope, name, str(rule_id), bytes_ / 1E9,
                rule['rse_expression'], target_rse_exp, child_rule_id)
            rebalanced_bytes += bytes_
            rebalanced_files += length
            rebalanced_datasets.append(
                (scope, name, bytes_, length, target_rse_exp, rule_id,
                 child_rule_id))
        except Exception as error:
            logger(
                logging.ERROR,
                'Exception %s occured while rebalancing %s:%s, rule_id: %s!',
                str(error), scope, name, str(rule_id))

    logger(logging.INFO, 'BB8 is rebalancing %d GB of data (%d rules) from %s',
           rebalanced_bytes / 1E9, len(rebalanced_datasets), src_rse)
    return rebalanced_datasets
Example #12
0
def atropos(thread,
            bulk,
            date_check,
            dry_run=True,
            grace_period=86400,
            once=True):
    """
    Creates an Atropos Worker that gets a list of rules which have an eol_at expired and delete them.

    :param thread: Thread number at startup.
    :param bulk: The number of requests to process.
    :param grace_period: The grace_period for the rules.
    :param once: Run only once.
    """

    sleep_time = 60

    executable = ' '.join(argv)
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()
    heartbeat.sanity_check(executable=executable, hostname=hostname)
    now = datetime.datetime.now()
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    summary = {}
    lifetime_exceptions = get_lifetime_exceptions()
    prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'] + 1,
                                         hb['nr_threads'])
    if not dry_run and date_check > now:
        logging.error(
            prepend_str +
            'Atropos cannot run in non-dry-run mode for date in the future')
    else:
        while not graceful_stop.is_set():

            hb = heartbeat.live(executable, hostname, pid, hb_thread)
            prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'] + 1,
                                                 hb['nr_threads'])

            stime = time.time()
            try:
                rules = get_rules_beyond_eol(date_check, thread,
                                             hb['nr_threads'] - 1)
                logging.info(prepend_str + '%s rules to process' %
                             (len(rules)))
                rule_idx = 0
                for rule in rules:
                    rule_idx += 1
                    logging.debug(
                        prepend_str + 'Working on rule %s on DID %s:%s on %s' %
                        (rule.id, rule.scope, rule.name, rule.rse_expression))

                    if (rule_idx % 1000) == 0:
                        logging.info(prepend_str + '%s/%s rules processed' %
                                     (rule_idx, len(rules)))
                    # We compute the expended eol_at
                    rses = parse_expression(rule.rse_expression)
                    eol_at = define_eol(rule.scope, rule.name, rses)

                    # Check the exceptions
                    if rule.name in lifetime_exceptions:
                        if rule.eol_at > lifetime_exceptions[rule.name]:
                            logging.info(
                                prepend_str +
                                'Rule %s on DID %s:%s on %s expired. Extension requested till %s'
                                % (rule.id, rule.scope, rule.name,
                                   rule.rse_expression,
                                   lifetime_exceptions[rule.name]))
                        else:
                            # If eol_at < requested extension, update eol_at
                            logging.info(
                                prepend_str +
                                'Updating rule %s on DID %s:%s on %s according to the exception till %s'
                                % (rule.id, rule.scope, rule.name,
                                   rule.rse_expression,
                                   lifetime_exceptions[rule.name]))
                            try:
                                update_rule(rule.id,
                                            options={
                                                'eol_at':
                                                lifetime_exceptions[rule.name]
                                            })
                            except RuleNotFound:
                                logging.warning(
                                    prepend_str +
                                    'Cannot find rule %s on DID %s:%s' %
                                    (rule.id, rule.scope, rule.name))
                    elif eol_at != rule.eol_at:
                        logging.warning(
                            prepend_str +
                            'The computed eol %s differs from the one recorded %s for rule %s on %s:%s at %s'
                            % (eol_at, rule.eol_at, rule.id, rule.scope,
                               rule.name, rule.rse_expression))
                        try:
                            update_rule(rule.id, options={'eol_at': eol_at})
                        except RuleNotFound:
                            logging.warning(
                                prepend_str +
                                'Cannot find rule %s on DID %s:%s' %
                                (rule.id, rule.scope, rule.name))

                    no_locks = True
                    for lock in get_dataset_locks(rule.scope, rule.name):
                        if lock['rule_id'] == rule[4]:
                            no_locks = False
                            if lock['rse'] not in summary:
                                summary[lock['rse']] = {}
                            if '%s:%s' % (rule.scope, rule.name
                                          ) not in summary[lock['rse']]:
                                summary[lock['rse']]['%s:%s' %
                                                     (rule.scope,
                                                      rule.name)] = {
                                                          'length':
                                                          lock['length'] or 0,
                                                          'bytes':
                                                          lock['bytes'] or 0
                                                      }
                    if no_locks:
                        logging.warning(
                            prepend_str +
                            'Cannot find a lock for rule %s on DID %s:%s' %
                            (rule.id, rule.scope, rule.name))
                    if not dry_run:
                        logging.info(
                            prepend_str +
                            'Setting %s seconds lifetime for rule %s' %
                            (grace_period, rule.id))
                        try:
                            update_rule(rule.id,
                                        options={'lifetime': grace_period})
                        except RuleNotFound:
                            logging.warning(
                                prepend_str +
                                'Cannot find rule %s on DID %s:%s' %
                                (rule.id, rule.scope, rule.name))
            except Exception:
                exc_type, exc_value, exc_traceback = exc_info()
                logging.critical(''.join(
                    format_exception(exc_type, exc_value,
                                     exc_traceback)).strip())

            for rse in summary:
                tot_size, tot_files, tot_datasets = 0, 0, 0
                for did in summary[rse]:
                    tot_datasets += 1
                    tot_files += summary[rse][did].get('length', 0)
                    tot_size += summary[rse][did].get('bytes', 0)
                logging.info(
                    prepend_str +
                    'For RSE %s %s datasets will be deleted representing %s files and %s bytes'
                    % (rse, tot_datasets, tot_files, tot_size))

            if once:
                break
            else:
                tottime = time.time() - stime
                if tottime < sleep_time:
                    logging.info(prepend_str + 'Will sleep for %s seconds' %
                                 (str(sleep_time - tottime)))
                    time.sleep(sleep_time - tottime)
                    continue

        logging.info(prepend_str + 'Graceful stop requested')
        heartbeat.die(executable, hostname, pid, hb_thread)
        logging.info(prepend_str + 'Graceful stop done')
Example #13
0
def rebalance_rule(parent_rule,
                   activity,
                   rse_expression,
                   priority,
                   source_replica_expression='*\\bb8-enabled=false',
                   comment=None):
    """
    Rebalance a replication rule to a new RSE

    :param parent_rule:                Replication rule to be rebalanced.
    :param activity:                   Activity to be used for the rebalancing.
    :param rse_expression:             RSE expression of the new rule.
    :param priority:                   Priority of the newly created rule.
    :param source_replica_expression:  Source replica expression of the new rule.
    :param comment:                    Comment to set on the new rules.
    :returns:                          The new child rule id.
    """

    if parent_rule['expires_at'] is None:
        lifetime = None
    else:
        lifetime = (parent_rule['expires_at'] -
                    datetime.utcnow()).days * 24 * 3600 + (
                        parent_rule['expires_at'] - datetime.utcnow()).seconds

    if parent_rule['grouping'] == RuleGrouping.ALL:
        grouping = 'ALL'
    elif parent_rule['grouping'] == RuleGrouping.NONE:
        grouping = 'NONE'
    else:
        grouping = 'DATASET'

    # ensure that expressions are for correct vo
    rule_vo = parent_rule['scope'].vo
    if parent_rule['scope'].vo != 'def':
        source_replica_expression = 'vo={}&({})'.format(
            rule_vo, source_replica_expression)
        rse_expression = 'vo={}&({})'.format(rule_vo, rse_expression)

    # check if concurrent replica at target rse does not exist
    concurrent_replica = False
    try:
        for lock in get_dataset_locks(parent_rule['scope'],
                                      parent_rule['name']):
            lock_rse_expr = lock['rse']
            if rule_vo != 'def':
                lock_rse_expr = 'vo={}&({})'.format(rule_vo, lock_rse_expr)
            if lock_rse_expr == rse_expression:  # may need to evaluate to be sure... could get 'vo=tst&(vo=tst&(MOCK))'
                concurrent_replica = True
    except Exception as error:
        concurrent_replica = True
        print('Exception: get_dataset_locks not feasible for %s %s:' %
              (parent_rule['scope'], parent_rule['name']))
        raise error
    if concurrent_replica:
        return 'Concurrent replica exists at target rse!'
    print(concurrent_replica)

    child_rule = add_rule(
        dids=[{
            'scope': parent_rule['scope'],
            'name': parent_rule['name']
        }],
        account=parent_rule['account'],
        copies=parent_rule['copies'],
        rse_expression=rse_expression,
        grouping=grouping,
        weight=parent_rule['weight'],
        lifetime=lifetime,
        locked=parent_rule['locked'],
        subscription_id=parent_rule['subscription_id'],
        source_replica_expression=source_replica_expression,
        activity=activity,
        notify=parent_rule['notification'],
        purge_replicas=parent_rule['purge_replicas'],
        ignore_availability=False,
        comment=parent_rule['comments'] if not comment else comment,
        ask_approval=False,
        asynchronous=False,
        ignore_account_limit=True,
        priority=priority)[0]

    update_rule(rule_id=parent_rule['id'],
                options={
                    'child_rule_id': child_rule,
                    'lifetime': 0
                })
    return child_rule
Example #14
0
def rebalance_rule(
    parent_rule,
    activity,
    rse_expression,
    priority,
    source_replica_expression="*\\bb8-enabled=false",
    comment=None,
    session=None,
):
    """
    Rebalance a replication rule to a new RSE
    :param parent_rule:                Replication rule to be rebalanced.
    :param activity:                   Activity to be used for the rebalancing.
    :param rse_expression:             RSE expression of the new rule.
    :param priority:                   Priority of the newly created rule.
    :param source_replica_expression:  Source replica expression of the new rule.
    :param comment:                    Comment to set on the new rules.
    :returns:                          The new child rule id.
    """

    if parent_rule["expires_at"] is None:
        lifetime = None
    else:
        lifetime = (parent_rule["expires_at"] -
                    datetime.utcnow()).days * 24 * 3600 + (
                        parent_rule["expires_at"] - datetime.utcnow()).seconds

    if parent_rule["grouping"] == RuleGrouping.ALL:
        grouping = "ALL"
    elif parent_rule["grouping"] == RuleGrouping.NONE:
        grouping = "NONE"
    else:
        grouping = "DATASET"

    # check if concurrent replica at target rse does not exist
    concurrent_replica = False
    for lock in get_dataset_locks(parent_rule["scope"], parent_rule["name"]):
        lock_rse_expr = lock["rse"]
        if lock_rse_expr == rse_expression:
            concurrent_replica = True

    if concurrent_replica:
        return None

    child_rule = add_rule(
        dids=[{
            "scope": parent_rule["scope"],
            "name": parent_rule["name"]
        }],
        account=parent_rule["account"],
        copies=parent_rule["copies"],
        rse_expression=rse_expression,
        grouping=grouping,
        weight=parent_rule["weight"],
        lifetime=lifetime,
        locked=parent_rule["locked"],
        subscription_id=parent_rule["subscription_id"],
        source_replica_expression=source_replica_expression,
        activity=activity,
        notify=parent_rule["notification"],
        purge_replicas=parent_rule["purge_replicas"],
        ignore_availability=False,
        comment=parent_rule["comments"] if not comment else comment,
        ask_approval=False,
        asynchronous=False,
        ignore_account_limit=True,
        priority=priority,
        session=session,
    )[0]

    update_rule(
        rule_id=parent_rule["id"],
        options={
            "child_rule_id": child_rule,
            "lifetime": 0
        },
        session=session,
    )
    return child_rule