Esempio n. 1
0
def update_replication_rule(rule_id, options, issuer, vo='def'):
    """
    Update lock state of a replication rule.

    :param rule_id:     The rule_id to lock.
    :param options:     Options dictionary.
    :param issuer:      The issuing account of this operation
    :param vo:          The VO to act on.
    :raises:            RuleNotFound if no Rule can be found.
    """
    kwargs = {'rule_id': rule_id, 'options': options}
    if 'approve' in options:
        if not has_permission(
                issuer=issuer, vo=vo, action='approve_rule', kwargs=kwargs):
            raise AccessDenied(
                'Account %s can not approve/deny this replication rule.' %
                (issuer))

        issuer = InternalAccount(issuer, vo=vo)
        if options['approve']:
            rule.approve_rule(rule_id=rule_id, approver=issuer)
        else:
            rule.deny_rule(rule_id=rule_id,
                           approver=issuer,
                           reason=options.get('comment', None))
    else:
        if not has_permission(
                issuer=issuer, vo=vo, action='update_rule', kwargs=kwargs):
            raise AccessDenied(
                'Account %s can not update this replication rule.' % (issuer))
        rule.update_rule(rule_id=rule_id, options=options)
Esempio n. 2
0
    def test_judge_expire_rule_with_child_rule(self):
        """ JUDGE CLEANER: Test the judge when deleting expired rules with child rules"""
        scope = InternalScope('mock', **self.vo)
        files = create_files(3, scope, self.rse1_id)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.DATASET, self.jdoe)
        attach_dids(scope, dataset, files, self.jdoe)

        rule_id = add_rule(dids=[{
            'scope': scope,
            'name': dataset
        }],
                           account=self.jdoe,
                           copies=1,
                           rse_expression=self.rse1,
                           grouping='NONE',
                           weight='fakeweight',
                           lifetime=None,
                           locked=False,
                           subscription_id=None)[0]
        child_rule = add_rule(dids=[{
            'scope': scope,
            'name': dataset
        }],
                              account=self.jdoe,
                              copies=1,
                              rse_expression=self.rse3,
                              grouping='NONE',
                              weight='fakeweight',
                              lifetime=-3,
                              locked=False,
                              subscription_id=None)[0]
        update_rule(rule_id, {'child_rule_id': child_rule})

        rule_cleaner(once=True)
Esempio n. 3
0
    def test_locked_rule(self):
        """ REPLICATION RULE (CORE): Delete a locked replication rule"""
        scope = 'mock'
        files = create_files(3, scope, self.rse1)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=True, subscription_id=None)[0]

        assert_raises(AccessDenied, delete_rule, rule_id_1)
        update_rule(rule_id=rule_id_1, options={'locked': False})
        delete_rule(rule_id=rule_id_1)
Esempio n. 4
0
def update_replication_rule(rule_id, options, issuer):
    """
    Update lock state of a replication rule.

    :param rule_id:     The rule_id to lock.
    :param options:     Options dictionary.
    :param issuer:      The issuing account of this operation
    :raises:            RuleNotFound if no Rule can be found.
    """
    kwargs = {'rule_id': rule_id, 'options': options}
    if not has_permission(issuer=issuer, action='update_rule', kwargs=kwargs):
        raise AccessDenied('Account %s can not update this replication rule.' % (issuer))
    rule.update_rule(rule_id=rule_id, options=options)
Esempio n. 5
0
    def test_account_counter_rule_update(self):
        """ REPLICATION RULE (CORE): Test if the account counter is updated correctly when a rule is updated"""

        scope = 'mock'
        files = create_files(3, scope, self.rse1, bytes=100)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None)[0]

        account_update(once=True)
        account_counter_before_1 = get_account_counter(self.rse1_id, 'jdoe')
        account_counter_before_2 = get_account_counter(self.rse1_id, 'root')

        update_rule(rule_id, {'account': 'root'})
        account_update(once=True)

        # Check if the counter has been updated correctly
        account_counter_after_1 = get_account_counter(self.rse1_id, 'jdoe')
        account_counter_after_2 = get_account_counter(self.rse1_id, 'root')
        assert(account_counter_before_1['bytes'] - 3*100 == account_counter_after_1['bytes'])
        assert(account_counter_before_2['bytes'] + 3*100 == account_counter_after_2['bytes'])
Esempio n. 6
0
def rebalance_rule(parent_rule_id,
                   activity,
                   rse_expression,
                   priority,
                   source_replica_expression=None,
                   comment=None):
    """
    Rebalance a replication rule to a new RSE

    :param parent_rule_id:             Replication rule to be rebalanced.
    :param activity:                   Activity to be used for the rebalancing.
    :param rse_expression:             RSE expression of the new rule.
    :param priority:                   Priority of the newly created rule.
    :param source_replica_expression:  Source replica expression of the new rule.
    :param comment:                    Comment to set on the new rules.
    :returns:                          The new child rule id.
    """
    parent_rule = get_rule(rule_id=parent_rule_id)

    if parent_rule['expires_at'] is None:
        lifetime = None
    else:
        lifetime = (parent_rule['expires_at'] -
                    datetime.utcnow()).days * 24 * 3600 + (
                        parent_rule['expires_at'] - datetime.utcnow()).seconds

    if parent_rule['grouping'] == RuleGrouping.ALL:
        grouping = 'ALL'
    elif parent_rule['grouping'] == RuleGrouping.NONE:
        grouping = 'NONE'
    else:
        grouping = 'DATASET'

    # check if concurrent replica at target rse does not exist
    concurrent_replica = False
    try:
        for lock in get_dataset_locks(parent_rule['scope'],
                                      parent_rule['name']):
            if lock['rse'] == rse_expression:
                concurrent_replica = True
    except Exception as error:
        concurrent_replica = True
        print 'Exception: get_dataset_locks not feasible for %s %s:' % (
            parent_rule['scope'], parent_rule['name'])
        raise error
    if concurrent_replica:
        return 'Concurrent replica exists at target rse!'
    print concurrent_replica

    child_rule = add_rule(
        dids=[{
            'scope': parent_rule['scope'],
            'name': parent_rule['name']
        }],
        account=parent_rule['account'],
        copies=parent_rule['copies'],
        rse_expression=rse_expression,
        grouping=grouping,
        weight=parent_rule['weight'],
        lifetime=lifetime,
        locked=parent_rule['locked'],
        subscription_id=parent_rule['subscription_id'],
        source_replica_expression=source_replica_expression,
        activity=activity,
        notify=parent_rule['notification'],
        purge_replicas=parent_rule['purge_replicas'],
        ignore_availability=False,
        comment=parent_rule['comments'] if not comment else comment,
        ask_approval=False,
        asynchronous=False,
        priority=priority)[0]

    update_rule(rule_id=parent_rule_id,
                options={
                    'child_rule_id': child_rule,
                    'lifetime': 0
                })
    return child_rule
Esempio n. 7
0
def atropos(thread,
            bulk,
            date_check,
            dry_run=True,
            grace_period=86400,
            once=True,
            unlock=False,
            spread_period=0,
            purge_replicas=False):
    """
    Creates an Atropos Worker that gets a list of rules which have an eol_at expired and delete them.

    :param thread: Thread number at startup.
    :param bulk: The number of requests to process.
    :param grace_period: The grace_period for the rules.
    :param once: Run only once.
    """

    sleep_time = 60

    executable = 'atropos'
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()
    heartbeat.sanity_check(executable=executable, hostname=hostname)
    now = datetime.datetime.now()
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    time.sleep(10)
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'], hb['nr_threads'])
    logging.debug(prepend_str + 'Starting worker')
    summary = {}
    lifetime_exceptions = {}
    rand = random.Random(hb['assign_thread'])
    for excep in rucio.core.lifetime_exception.list_exceptions(
            exception_id=None,
            states=[
                LifetimeExceptionsState.APPROVED,
            ],
            session=None):
        key = '{}:{}'.format(excep['scope'].internal, excep['name'])
        if key not in lifetime_exceptions:
            lifetime_exceptions[key] = excep['expires_at']
        elif lifetime_exceptions[key] < excep['expires_at']:
            lifetime_exceptions[key] = excep['expires_at']
    logging.debug(prepend_str +
                  '%s active exceptions' % len(lifetime_exceptions))
    if not dry_run and date_check > now:
        logging.error(
            prepend_str +
            'Atropos cannot run in non-dry-run mode for date in the future')
    else:
        while not GRACEFUL_STOP.is_set():

            hb = heartbeat.live(executable, hostname, pid, hb_thread)
            prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'],
                                                 hb['nr_threads'])

            stime = time.time()
            try:
                rules = get_rules_beyond_eol(date_check,
                                             thread,
                                             hb['nr_threads'],
                                             session=None)
                logging.info(prepend_str + '%s rules to process' %
                             (len(rules)))
                for rule_idx, rule in enumerate(rules, start=1):
                    did = '%s:%s' % (rule.scope, rule.name)
                    did_key = '{}:{}'.format(rule.scope.internal, rule.name)
                    logging.debug(prepend_str +
                                  'Working on rule %s on DID %s on %s' %
                                  (rule.id, did, rule.rse_expression))

                    if (rule_idx % 1000) == 0:
                        logging.info(prepend_str + '%s/%s rules processed' %
                                     (rule_idx, len(rules)))

                    # We compute the expected eol_at
                    try:
                        rses = parse_expression(rule.rse_expression,
                                                filter={'vo': rule.account.vo})
                    except InvalidRSEExpression:
                        logging.warning(
                            prepend_str +
                            'Rule %s has an RSE expression that results in an empty set: %s'
                            % (rule.id, rule.rse_expression))
                        continue
                    eol_at = rucio.core.lifetime_exception.define_eol(
                        rule.scope, rule.name, rses)
                    if eol_at != rule.eol_at:
                        logging.warning(
                            prepend_str +
                            'The computed eol %s differs from the one recorded %s for rule %s on %s at %s'
                            % (eol_at, rule.eol_at, rule.id, did,
                               rule.rse_expression))
                        try:
                            update_rule(rule.id, options={'eol_at': eol_at})
                        except RuleNotFound:
                            logging.warning(prepend_str +
                                            'Cannot find rule %s on DID %s' %
                                            (rule.id, did))
                            continue

                    # Check the exceptions
                    if did_key in lifetime_exceptions:
                        if eol_at > lifetime_exceptions[did_key]:
                            logging.info(
                                prepend_str +
                                'Rule %s on DID %s on %s has longer expiration date than the one requested : %s'
                                % (rule.id, did, rule.rse_expression,
                                   lifetime_exceptions[did_key]))
                        else:
                            # If eol_at < requested extension, update eol_at
                            logging.info(
                                prepend_str +
                                'Updating rule %s on DID %s on %s according to the exception till %s'
                                % (rule.id, did, rule.rse_expression,
                                   lifetime_exceptions[did_key]))
                            eol_at = lifetime_exceptions[did_key]
                            try:
                                update_rule(rule.id,
                                            options={
                                                'eol_at':
                                                lifetime_exceptions[did_key]
                                            })
                            except RuleNotFound:
                                logging.warning(
                                    prepend_str +
                                    'Cannot find rule %s on DID %s' %
                                    (rule.id, did))
                                continue

                    # Now check that the new eol_at is expired
                    if eol_at and eol_at < date_check:
                        no_locks = True
                        for lock in get_dataset_locks(rule.scope, rule.name):
                            if lock['rule_id'] == rule[4]:
                                no_locks = False
                                if lock['rse_id'] not in summary:
                                    summary[lock['rse_id']] = {}
                                if did_key not in summary[lock['rse_id']]:
                                    summary[lock['rse_id']][did_key] = {
                                        'length': lock['length'] or 0,
                                        'bytes': lock['bytes'] or 0
                                    }
                        if no_locks:
                            logging.warning(
                                prepend_str +
                                'Cannot find a lock for rule %s on DID %s' %
                                (rule.id, did))
                        if not dry_run:
                            lifetime = grace_period + rand.randrange(
                                spread_period + 1)
                            logging.info(
                                prepend_str +
                                'Setting %s seconds lifetime for rule %s' %
                                (lifetime, rule.id))
                            options = {'lifetime': lifetime}
                            if purge_replicas:
                                options['purge_replicas'] = True
                            if rule.locked and unlock:
                                logging.info(prepend_str + 'Unlocking rule %s',
                                             rule.id)
                                options['locked'] = False
                            try:
                                update_rule(rule.id, options=options)
                            except RuleNotFound:
                                logging.warning(
                                    prepend_str +
                                    'Cannot find rule %s on DID %s' %
                                    (rule.id, did))
                                continue
            except Exception:
                exc_type, exc_value, exc_traceback = exc_info()
                logging.critical(''.join(
                    format_exception(exc_type, exc_value,
                                     exc_traceback)).strip())

            for rse_id in summary:
                tot_size, tot_files, tot_datasets = 0, 0, 0
                for did in summary[rse_id]:
                    tot_datasets += 1
                    tot_files += summary[rse_id][did].get('length', 0)
                    tot_size += summary[rse_id][did].get('bytes', 0)
                vo = get_rse_vo(rse_id=rse_id)
                logging.info(
                    prepend_str +
                    'For RSE %s %s %s datasets will be deleted representing %s files and %s bytes'
                    % (get_rse_name(rse_id=rse_id), '' if vo == 'def' else
                       'on VO ' + vo, tot_datasets, tot_files, tot_size))

            if once:
                break
            else:
                tottime = time.time() - stime
                if tottime < sleep_time:
                    logging.info(prepend_str + 'Will sleep for %s seconds' %
                                 (str(sleep_time - tottime)))
                    time.sleep(sleep_time - tottime)
                    continue

        logging.info(prepend_str + 'Graceful stop requested')
        heartbeat.die(executable, hostname, pid, hb_thread)
        logging.info(prepend_str + 'Graceful stop done')
Esempio n. 8
0
def test_bb8_full_workflow(vo, root_account, jdoe_account, rse_factory,
                           mock_scope, did_factory):
    """BB8: Test the rebalance rule method"""
    config_core.set(section='bb8', option='allowed_accounts', value='jdoe')
    tot_rses = 4
    rses = [rse_factory.make_posix_rse() for _ in range(tot_rses)]
    rse1, rse1_id = rses[0]
    rse2, rse2_id = rses[1]
    rse3, rse3_id = rses[2]
    rse4, rse4_id = rses[3]

    # Add Tags
    # RSE 1 and 2 nmatch expression T1=true
    # RSE 3 and 4 nmatch expression T2=true
    T1 = tag_generator()
    T2 = tag_generator()
    add_rse_attribute(rse1_id, T1, True)
    add_rse_attribute(rse2_id, T1, True)
    add_rse_attribute(rse3_id, T2, True)
    add_rse_attribute(rse4_id, T2, True)

    # Add fake weights
    add_rse_attribute(rse1_id, "fakeweight", 10)
    add_rse_attribute(rse2_id, "fakeweight", 0)
    add_rse_attribute(rse3_id, "fakeweight", 0)
    add_rse_attribute(rse4_id, "fakeweight", 0)
    add_rse_attribute(rse1_id, "freespace", 1)
    add_rse_attribute(rse2_id, "freespace", 1)
    add_rse_attribute(rse3_id, "freespace", 1)
    add_rse_attribute(rse4_id, "freespace", 1)

    # Add quota
    set_local_account_limit(jdoe_account, rse1_id, -1)
    set_local_account_limit(jdoe_account, rse2_id, -1)
    set_local_account_limit(jdoe_account, rse3_id, -1)
    set_local_account_limit(jdoe_account, rse4_id, -1)

    set_local_account_limit(root_account, rse1_id, -1)
    set_local_account_limit(root_account, rse2_id, -1)
    set_local_account_limit(root_account, rse3_id, -1)
    set_local_account_limit(root_account, rse4_id, -1)

    # Invalid the cache because the result of parse_expression is cached
    REGION.invalidate()

    tot_datasets = 4
    # Create a list of datasets
    datasets = [did_factory.make_dataset() for _ in range(tot_datasets)]
    dsn = [dataset['name'] for dataset in datasets]

    rules = list()

    base_unit = 100000000000
    nb_files1 = 7
    nb_files2 = 5
    nb_files3 = 3
    nb_files4 = 2
    file_size = 1 * base_unit
    rule_to_rebalance = None

    # Add one secondary file
    files = create_files(1, mock_scope, rse1_id, bytes_=1)
    add_rule(dids=[{
        'scope': mock_scope,
        'name': files[0]['name']
    }],
             account=jdoe_account,
             copies=1,
             rse_expression=rse1,
             grouping='DATASET',
             weight=None,
             lifetime=-86400,
             locked=False,
             subscription_id=None)[0]
    for cnt in range(3, tot_rses):
        add_replicas(rses[cnt][1], files, jdoe_account)
        add_rule(dids=[{
            'scope': mock_scope,
            'name': files[0]['name']
        }],
                 account=jdoe_account,
                 copies=1,
                 rse_expression=rses[cnt][0],
                 grouping='DATASET',
                 weight=None,
                 lifetime=-86400,
                 locked=False,
                 subscription_id=None)[0]
    rule_cleaner(once=True)

    # Create dataset 1 of 800 GB and create a rule on RSE 1 and RSE 3
    files = create_files(nb_files1, mock_scope, rse1_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[0], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[0]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse1,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    add_replicas(rse3_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[0]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse3,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    # Create dataset 2 of 500 GB and create a rule on RSE 1 and RSE 2
    files = create_files(nb_files2, mock_scope, rse1_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[1], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[1]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse1,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    add_replicas(rse2_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[1]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse2,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    # Create dataset 3 of 300 GB and create a rule on RSE 1. The copy on RSE 3 is secondary
    files = create_files(nb_files3, mock_scope, rse1_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[2], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[2]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse1,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rule_to_rebalance = rule_id
    rules.append(rule_id)

    add_replicas(rse3_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[2]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse3,
                       grouping='DATASET',
                       weight=None,
                       lifetime=-86400,
                       locked=False,
                       subscription_id=None)[0]
    rule_cleaner(once=True)
    try:
        rule = get_rule(rule_id)
    except:
        pytest.raises(RuleNotFound, get_rule, rule_id)

    # Create dataset 4 of 200 GB and create a rule on RSE 3. The copy on RSE 2 is secondary
    files = create_files(nb_files4, mock_scope, rse3_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[3], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[3]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse3,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    add_replicas(rse2_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[3]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse2,
                       grouping='DATASET',
                       weight=None,
                       lifetime=-86400,
                       locked=False,
                       subscription_id=None)[0]
    rule_cleaner(once=True)
    try:
        rule = get_rule(rule_id)
    except:
        pytest.raises(RuleNotFound, get_rule, rule_id)

    for dataset in dsn:
        set_status(mock_scope, dataset, open=False)

    for rse in rses:
        fill_rse_expired(rse[1])
        set_rse_usage(rse_id=rse[1],
                      source='min_free_space',
                      used=2 * base_unit,
                      free=2 * base_unit,
                      session=None)
        set_rse_usage(rse_id=rse[1],
                      source='storage',
                      used=15 * base_unit,
                      free=2 * base_unit,
                      session=None)
    set_rse_usage(rse_id=rse2_id,
                  source='min_free_space',
                  used=1 * base_unit,
                  free=1 * base_unit,
                  session=None)
    set_rse_usage(rse_id=rse2_id,
                  source='storage',
                  used=6 * base_unit,
                  free=5 * base_unit,
                  session=None)

    run_abacus(once=True, threads=1, fill_history_table=False, sleep_time=10)
    # Summary :
    # RSE 1 : 1500 GB primary + 1 B secondary
    tot_space = [
        src for src in get_rse_usage(rse1_id) if src['source'] == 'rucio'
    ][0]
    expired = [
        src for src in get_rse_usage(rse1_id) if src['source'] == 'expired'
    ][0]
    assert tot_space['used'] == (nb_files1 + nb_files2 +
                                 nb_files3) * file_size + 1
    assert expired['used'] == 1
    # RSE 2 : 500 GB primary + 100 GB secondary
    tot_space = [
        src for src in get_rse_usage(rse2_id) if src['source'] == 'rucio'
    ][0]
    expired = [
        src for src in get_rse_usage(rse2_id) if src['source'] == 'expired'
    ][0]
    assert tot_space['used'] == (nb_files2 + nb_files4) * file_size
    assert expired['used'] == nb_files4 * file_size
    # Total primary on T1=true : 2000 GB
    # Total secondary on T1=true : 200 GB
    # Ratio secondary / primary = 10  %
    # Ratio on RSE 1 : 0 %
    # Ratio on RSE 2 : 40 %

    # Now run BB8

    re_evaluator(once=True, sleep_time=30, did_limit=100)
    bb8_run(once=True,
            rse_expression='%s=true' % str(T1),
            move_subscriptions=False,
            use_dump=False,
            sleep_time=300,
            threads=1,
            dry_run=False)

    for rule_id in rules:
        rule = get_rule(rule_id)
        if rule_id != rule_to_rebalance:
            assert (rule['child_rule_id'] is None)
        else:
            assert (rule['child_rule_id'] is not None)
            assert (
                rule['expires_at'] <= datetime.utcnow() + timedelta(seconds=1)
            )  # timedelta needed to prevent failure due to rounding effects
            child_rule_id = rule['child_rule_id']
            child_rule = get_rule(child_rule_id)
            assert (child_rule['rse_expression'] == rse2)
            # For teardown, delete child rule
            update_rule(child_rule_id, {'lifetime': -86400})
    rule_cleaner(once=True)

    for dataset in dsn:
        set_metadata(mock_scope, dataset, 'lifetime', -86400)
    undertaker.run(once=True)
Esempio n. 9
0
def rule_injector(once=False, sleep_time=60):
    """
    Main loop to check for asynchronous creation of replication rules
    """

    hostname = socket.gethostname()
    pid = os.getpid()
    current_thread = threading.current_thread()

    paused_rules = {}  # {rule_id: datetime}

    # Make an initial heartbeat so that all judge-inectors have the correct worker number on the next try
    executable = 'judge-injector'
    heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=current_thread, older_than=2 * 60 * 60)
    prefix = 'judge-injector[%i/%i] ' % (heartbeat['assign_thread'], heartbeat['nr_threads'])
    logger = formatted_logger(logging.log, prefix + '%s')
    graceful_stop.wait(1)

    while not graceful_stop.is_set():
        try:
            # heartbeat
            heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=current_thread, older_than=2 * 60 * 60)
            prefix = 'judge-injector[%i/%i] ' % (heartbeat['assign_thread'], heartbeat['nr_threads'])
            logger = formatted_logger(logging.log, prefix + '%s')

            start = time.time()

            # Refresh paused rules
            iter_paused_rules = deepcopy(paused_rules)
            for key in iter_paused_rules:
                if datetime.utcnow() > paused_rules[key]:
                    del paused_rules[key]

            rules = get_injected_rules(total_workers=heartbeat['nr_threads'],
                                       worker_number=heartbeat['assign_thread'],
                                       limit=100,
                                       blocked_rules=[key for key in paused_rules])
            logger(logging.DEBUG, 'index query time %f fetch size is %d' % (time.time() - start, len(rules)))

            if not rules and not once:
                logger(logging.DEBUG, 'did not get any work (paused_rules=%s)' % str(len(paused_rules)))
                daemon_sleep(start_time=start, sleep_time=sleep_time, graceful_stop=graceful_stop, logger=logger)
            else:
                for rule in rules:
                    rule_id = rule[0]
                    logger(logging.INFO, 'Injecting rule %s' % rule_id)
                    if graceful_stop.is_set():
                        break
                    try:
                        start = time.time()
                        inject_rule(rule_id=rule_id, logger=logger)
                        logger(logging.DEBUG, 'injection of %s took %f' % (rule_id, time.time() - start))
                    except (DatabaseException, DatabaseError) as e:
                        if match('.*ORA-00054.*', str(e.args[0])):
                            paused_rules[rule_id] = datetime.utcnow() + timedelta(seconds=randint(60, 600))
                            record_counter('rule.judge.exceptions.LocksDetected')
                            logger(logging.WARNING, 'Locks detected for %s' % rule_id)
                        elif match('.*QueuePool.*', str(e.args[0])):
                            logger(logging.WARNING, 'DatabaseException', exc_info=True)
                            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                        elif match('.*ORA-03135.*', str(e.args[0])):
                            logger(logging.WARNING, 'DatabaseException', exc_info=True)
                            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                        else:
                            logger(logging.ERROR, 'DatabaseException', exc_info=True)
                            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                    except (RSEWriteBlocked) as e:
                        paused_rules[rule_id] = datetime.utcnow() + timedelta(seconds=randint(60, 600))
                        logger(logging.WARNING, 'RSEWriteBlocked for rule %s' % rule_id)
                        record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                    except ReplicationRuleCreationTemporaryFailed as e:
                        paused_rules[rule_id] = datetime.utcnow() + timedelta(seconds=randint(60, 600))
                        logger(logging.WARNING, 'ReplicationRuleCreationTemporaryFailed for rule %s' % rule_id)
                        record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                    except RuleNotFound:
                        pass
                    except InsufficientAccountLimit:
                        # A rule with InsufficientAccountLimit on injection hangs there potentially forever
                        # It should be marked as SUSPENDED
                        logger(logging.INFO, 'Marking rule %s as SUSPENDED due to InsufficientAccountLimit' % rule_id)
                        update_rule(rule_id=rule_id, options={'state': 'SUSPENDED'})

        except (DatabaseException, DatabaseError) as e:
            if match('.*QueuePool.*', str(e.args[0])):
                logger(logging.WARNING, 'DatabaseException', exc_info=True)
                record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
            elif match('.*ORA-03135.*', str(e.args[0])):
                logger(logging.WARNING, 'DatabaseException', exc_info=True)
                record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
            else:
                logger(logging.CRITICAL, 'DatabaseException', exc_info=True)
                record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
        except Exception as e:
            logger(logging.CRITICAL, 'Exception', exc_info=True)
            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
        if once:
            break

    die(executable=executable, hostname=hostname, pid=pid, thread=current_thread)
Esempio n. 10
0
def rule_injector(once=False):
    """
    Main loop to check for asynchronous creation of replication rules
    """

    hostname = socket.gethostname()
    pid = os.getpid()
    current_thread = threading.current_thread()

    paused_rules = {}  # {rule_id: datetime}

    # Make an initial heartbeat so that all judge-inectors have the correct worker number on the next try
    live(executable='rucio-judge-injector',
         hostname=hostname,
         pid=pid,
         thread=current_thread,
         older_than=2 * 60 * 60)
    graceful_stop.wait(1)

    while not graceful_stop.is_set():
        try:
            # heartbeat
            heartbeat = live(executable='rucio-judge-injector',
                             hostname=hostname,
                             pid=pid,
                             thread=current_thread,
                             older_than=2 * 60 * 60)

            start = time.time()

            # Refresh paused rules
            iter_paused_rules = deepcopy(paused_rules)
            for key in iter_paused_rules:
                if datetime.utcnow() > paused_rules[key]:
                    del paused_rules[key]

            rules = get_injected_rules(
                total_workers=heartbeat['nr_threads'] - 1,
                worker_number=heartbeat['assign_thread'],
                limit=100,
                blacklisted_rules=[key for key in paused_rules])
            logging.debug(
                'rule_injector[%s/%s] index query time %f fetch size is %d' %
                (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1,
                 time.time() - start, len(rules)))

            if not rules and not once:
                logging.debug(
                    'rule_injector[%s/%s] did not get any work (paused_rules=%s)'
                    % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1,
                       str(len(paused_rules))))
                graceful_stop.wait(60)
            else:
                for rule in rules:
                    rule_id = rule[0]
                    logging.info('rule_injector[%s/%s]: Injecting rule %s' %
                                 (heartbeat['assign_thread'],
                                  heartbeat['nr_threads'] - 1, rule_id))
                    if graceful_stop.is_set():
                        break
                    try:
                        start = time.time()
                        inject_rule(rule_id=rule_id)
                        logging.debug(
                            'rule_injector[%s/%s]: injection of %s took %f' %
                            (heartbeat['assign_thread'],
                             heartbeat['nr_threads'] - 1, rule_id,
                             time.time() - start))
                    except (DatabaseException, DatabaseError) as e:
                        if match('.*ORA-00054.*', str(e.args[0])):
                            paused_rules[rule_id] = datetime.utcnow(
                            ) + timedelta(seconds=randint(60, 600))
                            record_counter(
                                'rule.judge.exceptions.LocksDetected')
                            logging.warning(
                                'rule_injector[%s/%s]: Locks detected for %s' %
                                (heartbeat['assign_thread'],
                                 heartbeat['nr_threads'] - 1, rule_id))
                        elif match('.*QueuePool.*', str(e.args[0])):
                            logging.warning(traceback.format_exc())
                            record_counter('rule.judge.exceptions.%s' %
                                           e.__class__.__name__)
                        elif match('.*ORA-03135.*', str(e.args[0])):
                            logging.warning(traceback.format_exc())
                            record_counter('rule.judge.exceptions.%s' %
                                           e.__class__.__name__)
                        else:
                            logging.error(traceback.format_exc())
                            record_counter('rule.judge.exceptions.%s' %
                                           e.__class__.__name__)
                    except RSEBlacklisted as e:
                        paused_rules[rule_id] = datetime.utcnow() + timedelta(
                            seconds=randint(60, 600))
                        logging.warning(
                            'rule_injector[%s/%s]: RSEBlacklisted for rule %s'
                            % (heartbeat['assign_thread'],
                               heartbeat['nr_threads'] - 1, rule_id))
                        record_counter('rule.judge.exceptions.%s' %
                                       e.__class__.__name__)
                    except ReplicationRuleCreationTemporaryFailed as e:
                        paused_rules[rule_id] = datetime.utcnow() + timedelta(
                            seconds=randint(60, 600))
                        logging.warning(
                            'rule_injector[%s/%s]: ReplicationRuleCreationTemporaryFailed for rule %s'
                            % (heartbeat['assign_thread'],
                               heartbeat['nr_threads'] - 1, rule_id))
                        record_counter('rule.judge.exceptions.%s' %
                                       e.__class__.__name__)
                    except RuleNotFound as e:
                        pass
                    except InsufficientAccountLimit as e:
                        # A rule with InsufficientAccountLimit on injection hangs there potentially forever
                        # It should be marked as SUSPENDED
                        logging.info(
                            'rule_injector[%s/%s]: Marking rule %s as SUSPENDED due to InsufficientAccountLimit'
                            % (heartbeat['assign_thread'],
                               heartbeat['nr_threads'] - 1, rule_id))
                        update_rule(rule_id=rule_id,
                                    options={'state': 'SUSPENDED'})

        except (DatabaseException, DatabaseError) as e:
            if match('.*QueuePool.*', str(e.args[0])):
                logging.warning(traceback.format_exc())
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
            elif match('.*ORA-03135.*', str(e.args[0])):
                logging.warning(traceback.format_exc())
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
            else:
                logging.critical(traceback.format_exc())
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
        except Exception as e:
            logging.critical(traceback.format_exc())
            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
        if once:
            break

    die(executable='rucio-judge-injector',
        hostname=hostname,
        pid=pid,
        thread=current_thread)
Esempio n. 11
0
def atropos(thread,
            bulk,
            date_check,
            dry_run=True,
            grace_period=86400,
            once=True):
    """
    Creates an Atropos Worker that gets a list of rules which have an eol_at expired and delete them.

    :param thread: Thread number at startup.
    :param bulk: The number of requests to process.
    :param grace_period: The grace_period for the rules.
    :param once: Run only once.
    """

    sleep_time = 60

    executable = ' '.join(argv)
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()
    heartbeat.sanity_check(executable=executable, hostname=hostname)
    now = datetime.datetime.now()
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    summary = {}
    lifetime_exceptions = get_lifetime_exceptions()
    prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'] + 1,
                                         hb['nr_threads'])
    if not dry_run and date_check > now:
        logging.error(
            prepend_str +
            'Atropos cannot run in non-dry-run mode for date in the future')
    else:
        while not graceful_stop.is_set():

            hb = heartbeat.live(executable, hostname, pid, hb_thread)
            prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'] + 1,
                                                 hb['nr_threads'])

            stime = time.time()
            try:
                rules = get_rules_beyond_eol(date_check, thread,
                                             hb['nr_threads'] - 1)
                logging.info(prepend_str + '%s rules to process' %
                             (len(rules)))
                rule_idx = 0
                for rule in rules:
                    rule_idx += 1
                    logging.debug(
                        prepend_str + 'Working on rule %s on DID %s:%s on %s' %
                        (rule.id, rule.scope, rule.name, rule.rse_expression))

                    if (rule_idx % 1000) == 0:
                        logging.info(prepend_str + '%s/%s rules processed' %
                                     (rule_idx, len(rules)))
                    # We compute the expended eol_at
                    rses = parse_expression(rule.rse_expression)
                    eol_at = define_eol(rule.scope, rule.name, rses)

                    # Check the exceptions
                    if rule.name in lifetime_exceptions:
                        if rule.eol_at > lifetime_exceptions[rule.name]:
                            logging.info(
                                prepend_str +
                                'Rule %s on DID %s:%s on %s expired. Extension requested till %s'
                                % (rule.id, rule.scope, rule.name,
                                   rule.rse_expression,
                                   lifetime_exceptions[rule.name]))
                        else:
                            # If eol_at < requested extension, update eol_at
                            logging.info(
                                prepend_str +
                                'Updating rule %s on DID %s:%s on %s according to the exception till %s'
                                % (rule.id, rule.scope, rule.name,
                                   rule.rse_expression,
                                   lifetime_exceptions[rule.name]))
                            try:
                                update_rule(rule.id,
                                            options={
                                                'eol_at':
                                                lifetime_exceptions[rule.name]
                                            })
                            except RuleNotFound:
                                logging.warning(
                                    prepend_str +
                                    'Cannot find rule %s on DID %s:%s' %
                                    (rule.id, rule.scope, rule.name))
                    elif eol_at != rule.eol_at:
                        logging.warning(
                            prepend_str +
                            'The computed eol %s differs from the one recorded %s for rule %s on %s:%s at %s'
                            % (eol_at, rule.eol_at, rule.id, rule.scope,
                               rule.name, rule.rse_expression))
                        try:
                            update_rule(rule.id, options={'eol_at': eol_at})
                        except RuleNotFound:
                            logging.warning(
                                prepend_str +
                                'Cannot find rule %s on DID %s:%s' %
                                (rule.id, rule.scope, rule.name))

                    no_locks = True
                    for lock in get_dataset_locks(rule.scope, rule.name):
                        if lock['rule_id'] == rule[4]:
                            no_locks = False
                            if lock['rse'] not in summary:
                                summary[lock['rse']] = {}
                            if '%s:%s' % (rule.scope, rule.name
                                          ) not in summary[lock['rse']]:
                                summary[lock['rse']]['%s:%s' %
                                                     (rule.scope,
                                                      rule.name)] = {
                                                          'length':
                                                          lock['length'] or 0,
                                                          'bytes':
                                                          lock['bytes'] or 0
                                                      }
                    if no_locks:
                        logging.warning(
                            prepend_str +
                            'Cannot find a lock for rule %s on DID %s:%s' %
                            (rule.id, rule.scope, rule.name))
                    if not dry_run:
                        logging.info(
                            prepend_str +
                            'Setting %s seconds lifetime for rule %s' %
                            (grace_period, rule.id))
                        try:
                            update_rule(rule.id,
                                        options={'lifetime': grace_period})
                        except RuleNotFound:
                            logging.warning(
                                prepend_str +
                                'Cannot find rule %s on DID %s:%s' %
                                (rule.id, rule.scope, rule.name))
            except Exception:
                exc_type, exc_value, exc_traceback = exc_info()
                logging.critical(''.join(
                    format_exception(exc_type, exc_value,
                                     exc_traceback)).strip())

            for rse in summary:
                tot_size, tot_files, tot_datasets = 0, 0, 0
                for did in summary[rse]:
                    tot_datasets += 1
                    tot_files += summary[rse][did].get('length', 0)
                    tot_size += summary[rse][did].get('bytes', 0)
                logging.info(
                    prepend_str +
                    'For RSE %s %s datasets will be deleted representing %s files and %s bytes'
                    % (rse, tot_datasets, tot_files, tot_size))

            if once:
                break
            else:
                tottime = time.time() - stime
                if tottime < sleep_time:
                    logging.info(prepend_str + 'Will sleep for %s seconds' %
                                 (str(sleep_time - tottime)))
                    time.sleep(sleep_time - tottime)
                    continue

        logging.info(prepend_str + 'Graceful stop requested')
        heartbeat.die(executable, hostname, pid, hb_thread)
        logging.info(prepend_str + 'Graceful stop done')
Esempio n. 12
0
def add_files(lfns, account, ignore_availability, vo='def', session=None):
    """
    Bulk add files :
    - Create the file and replica.
    - If doesn't exist create the dataset containing the file as well as a rule on the dataset on ANY sites.
    - Create all the ascendants of the dataset if they do not exist

    :param lfns: List of lfn (dictionary {'lfn': <lfn>, 'rse': <rse>, 'bytes': <bytes>, 'adler32': <adler32>, 'guid': <guid>, 'pfn': <pfn>}
    :param issuer: The issuer account.
    :param ignore_availability: A boolean to ignore blocklisted sites.
    :param vo: The VO to act on
    :param session: The session used
    """
    rule_extension_list = []
    attachments = []
    # The list of scopes is necessary for the extract_scope
    filter_ = {'scope': InternalScope(scope='*', vo=vo)}
    scopes = list_scopes(filter_=filter_, session=session)
    scopes = [scope.external for scope in scopes]
    exist_lfn = []
    try:
        lifetime_dict = config_get(section='dirac', option='lifetime', session=session)
        lifetime_dict = loads(lifetime_dict)
    except ConfigNotFound:
        lifetime_dict = {}
    except JSONDecodeError as err:
        raise InvalidType('Problem parsing lifetime option in dirac section : %s' % str(err))
    except Exception as err:
        raise RucioException(str(err))

    for lfn in lfns:
        # First check if the file exists
        filename = lfn['lfn']
        lfn_scope, _ = extract_scope(filename, scopes)
        lfn_scope = InternalScope(lfn_scope, vo=vo)

        exists, did_type = _exists(lfn_scope, filename)
        if exists:
            continue

        # Get all the ascendants of the file
        lfn_split = filename.split('/')
        lpns = ["/".join(lfn_split[:idx]) for idx in range(2, len(lfn_split))]
        lpns.reverse()
        print(lpns)

        # The parent must be a dataset. Register it as well as the rule
        dsn_name = lpns[0]
        dsn_scope, _ = extract_scope(dsn_name, scopes)
        dsn_scope = InternalScope(dsn_scope, vo=vo)

        # Compute lifetime
        lifetime = None
        if dsn_scope in lifetime_dict:
            lifetime = lifetime_dict[dsn_scope]
        else:
            for pattern in lifetime_dict:
                if re.match(pattern, dsn_scope):
                    lifetime = lifetime_dict[pattern]
                    break

        exists, did_type = _exists(dsn_scope, dsn_name)
        if exists and did_type == DIDType.CONTAINER:
            raise UnsupportedOperation('Cannot create %s as dataset' % dsn_name)
        if (dsn_name not in exist_lfn) and not exists:
            print('Will create %s' % dsn_name)
            # to maintain a compatibility between master and LTS-1.26 branches remove keywords for first 3 arguments
            add_did(dsn_scope,
                    dsn_name,
                    DIDType.DATASET,
                    account=InternalAccount(account, vo=vo),
                    statuses=None,
                    meta=None,
                    rules=[{'copies': 1, 'rse_expression': 'ANY=true', 'weight': None, 'account': InternalAccount(account, vo=vo), 'lifetime': None, 'grouping': 'NONE'}],
                    lifetime=None,
                    dids=None,
                    rse_id=None,
                    session=session)
            exist_lfn.append(dsn_name)
            parent_name = lpns[1]
            parent_scope, _ = extract_scope(parent_name, scopes)
            parent_scope = InternalScope(parent_scope, vo=vo)
            attachments.append({'scope': parent_scope, 'name': parent_name, 'dids': [{'scope': dsn_scope, 'name': dsn_name}]})
            rule_extension_list.append((dsn_scope, dsn_name))
        if lifetime and (dsn_scope, dsn_name) not in rule_extension_list:
            # Reset the lifetime of the rule to the configured value
            rule = [rul for rul in list_rules({'scope': dsn_scope, 'name': dsn_name, 'account': InternalAccount(account, vo=vo)}, session=session) if rul['rse_expression'] == 'ANY=true']
            if rule:
                update_rule(rule[0]['id'], options={'lifetime': lifetime}, session=session)
            rule_extension_list.append((dsn_scope, dsn_name))

        # Register the file
        rse_id = lfn.get('rse_id', None)
        if not rse_id:
            raise InvalidType('Missing rse_id')
        bytes_ = lfn.get('bytes', None)
        guid = lfn.get('guid', None)
        adler32 = lfn.get('adler32', None)
        pfn = lfn.get('pfn', None)
        files = {'scope': lfn_scope, 'name': filename, 'bytes': bytes_, 'adler32': adler32}
        if pfn:
            files['pfn'] = str(pfn)
        if guid:
            files['meta'] = {'guid': guid}
        add_replicas(rse_id=rse_id,
                     files=[files],
                     dataset_meta=None,
                     account=InternalAccount(account, vo=vo),
                     ignore_availability=ignore_availability,
                     session=session)
        add_rule(dids=[{'scope': lfn_scope, 'name': filename}],
                 account=InternalAccount(account, vo=vo),
                 copies=1,
                 rse_expression=lfn['rse'],
                 grouping=None,
                 weight=None,
                 lifetime=86400,
                 locked=None,
                 subscription_id=None,
                 session=session)
        attachments.append({'scope': dsn_scope, 'name': dsn_name, 'dids': [{'scope': lfn_scope, 'name': filename}]})

        # Now loop over the ascendants of the dataset and created them
        for lpn in lpns[1:]:
            child_scope, _ = extract_scope(lpn, scopes)
            child_scope = InternalScope(child_scope, vo=vo)
            exists, did_type = _exists(child_scope, lpn)
            if exists and did_type == DIDType.DATASET:
                raise UnsupportedOperation('Cannot create %s as container' % lpn)
            if (lpn not in exist_lfn) and not exists:
                print('Will create %s' % lpn)
                add_did(child_scope,
                        lpn,
                        DIDType.CONTAINER,
                        account=InternalAccount(account, vo=vo),
                        statuses=None,
                        meta=None,
                        rules=None,
                        lifetime=None,
                        dids=None,
                        rse_id=None,
                        session=session)
                exist_lfn.append(lpn)
                parent_name = lpns[lpns.index(lpn) + 1]
                parent_scope, _ = extract_scope(parent_name, scopes)
                parent_scope = InternalScope(parent_scope, vo=vo)
                attachments.append({'scope': parent_scope, 'name': parent_name, 'dids': [{'scope': child_scope, 'name': lpn}]})
    # Finally attach everything
    attach_dids_to_dids(attachments,
                        account=InternalAccount(account, vo=vo),
                        ignore_duplicate=True,
                        session=session)
Esempio n. 13
0
def rebalance_rule(parent_rule,
                   activity,
                   rse_expression,
                   priority,
                   source_replica_expression='*\\bb8-enabled=false',
                   comment=None):
    """
    Rebalance a replication rule to a new RSE

    :param parent_rule:                Replication rule to be rebalanced.
    :param activity:                   Activity to be used for the rebalancing.
    :param rse_expression:             RSE expression of the new rule.
    :param priority:                   Priority of the newly created rule.
    :param source_replica_expression:  Source replica expression of the new rule.
    :param comment:                    Comment to set on the new rules.
    :returns:                          The new child rule id.
    """

    if parent_rule['expires_at'] is None:
        lifetime = None
    else:
        lifetime = (parent_rule['expires_at'] -
                    datetime.utcnow()).days * 24 * 3600 + (
                        parent_rule['expires_at'] - datetime.utcnow()).seconds

    if parent_rule['grouping'] == RuleGrouping.ALL:
        grouping = 'ALL'
    elif parent_rule['grouping'] == RuleGrouping.NONE:
        grouping = 'NONE'
    else:
        grouping = 'DATASET'

    # ensure that expressions are for correct vo
    rule_vo = parent_rule['scope'].vo
    if parent_rule['scope'].vo != 'def':
        source_replica_expression = 'vo={}&({})'.format(
            rule_vo, source_replica_expression)
        rse_expression = 'vo={}&({})'.format(rule_vo, rse_expression)

    # check if concurrent replica at target rse does not exist
    concurrent_replica = False
    try:
        for lock in get_dataset_locks(parent_rule['scope'],
                                      parent_rule['name']):
            lock_rse_expr = lock['rse']
            if rule_vo != 'def':
                lock_rse_expr = 'vo={}&({})'.format(rule_vo, lock_rse_expr)
            if lock_rse_expr == rse_expression:  # may need to evaluate to be sure... could get 'vo=tst&(vo=tst&(MOCK))'
                concurrent_replica = True
    except Exception as error:
        concurrent_replica = True
        print('Exception: get_dataset_locks not feasible for %s %s:' %
              (parent_rule['scope'], parent_rule['name']))
        raise error
    if concurrent_replica:
        return 'Concurrent replica exists at target rse!'
    print(concurrent_replica)

    child_rule = add_rule(
        dids=[{
            'scope': parent_rule['scope'],
            'name': parent_rule['name']
        }],
        account=parent_rule['account'],
        copies=parent_rule['copies'],
        rse_expression=rse_expression,
        grouping=grouping,
        weight=parent_rule['weight'],
        lifetime=lifetime,
        locked=parent_rule['locked'],
        subscription_id=parent_rule['subscription_id'],
        source_replica_expression=source_replica_expression,
        activity=activity,
        notify=parent_rule['notification'],
        purge_replicas=parent_rule['purge_replicas'],
        ignore_availability=False,
        comment=parent_rule['comments'] if not comment else comment,
        ask_approval=False,
        asynchronous=False,
        ignore_account_limit=True,
        priority=priority)[0]

    update_rule(rule_id=parent_rule['id'],
                options={
                    'child_rule_id': child_rule,
                    'lifetime': 0
                })
    return child_rule
Esempio n. 14
0
def rebalance_rule(
    parent_rule,
    activity,
    rse_expression,
    priority,
    source_replica_expression="*\\bb8-enabled=false",
    comment=None,
    session=None,
):
    """
    Rebalance a replication rule to a new RSE
    :param parent_rule:                Replication rule to be rebalanced.
    :param activity:                   Activity to be used for the rebalancing.
    :param rse_expression:             RSE expression of the new rule.
    :param priority:                   Priority of the newly created rule.
    :param source_replica_expression:  Source replica expression of the new rule.
    :param comment:                    Comment to set on the new rules.
    :returns:                          The new child rule id.
    """

    if parent_rule["expires_at"] is None:
        lifetime = None
    else:
        lifetime = (parent_rule["expires_at"] -
                    datetime.utcnow()).days * 24 * 3600 + (
                        parent_rule["expires_at"] - datetime.utcnow()).seconds

    if parent_rule["grouping"] == RuleGrouping.ALL:
        grouping = "ALL"
    elif parent_rule["grouping"] == RuleGrouping.NONE:
        grouping = "NONE"
    else:
        grouping = "DATASET"

    # check if concurrent replica at target rse does not exist
    concurrent_replica = False
    for lock in get_dataset_locks(parent_rule["scope"], parent_rule["name"]):
        lock_rse_expr = lock["rse"]
        if lock_rse_expr == rse_expression:
            concurrent_replica = True

    if concurrent_replica:
        return None

    child_rule = add_rule(
        dids=[{
            "scope": parent_rule["scope"],
            "name": parent_rule["name"]
        }],
        account=parent_rule["account"],
        copies=parent_rule["copies"],
        rse_expression=rse_expression,
        grouping=grouping,
        weight=parent_rule["weight"],
        lifetime=lifetime,
        locked=parent_rule["locked"],
        subscription_id=parent_rule["subscription_id"],
        source_replica_expression=source_replica_expression,
        activity=activity,
        notify=parent_rule["notification"],
        purge_replicas=parent_rule["purge_replicas"],
        ignore_availability=False,
        comment=parent_rule["comments"] if not comment else comment,
        ask_approval=False,
        asynchronous=False,
        ignore_account_limit=True,
        priority=priority,
        session=session,
    )[0]

    update_rule(
        rule_id=parent_rule["id"],
        options={
            "child_rule_id": child_rule,
            "lifetime": 0
        },
        session=session,
    )
    return child_rule
Esempio n. 15
0
                        logging.warning(
                            'rule_injector[%s/%s]: ReplicationRuleCreationTemporaryFailed for rule %s'
                            % (heartbeat['assign_thread'],
                               heartbeat['nr_threads'] - 1, rule_id))
                        record_counter('rule.judge.exceptions.%s' %
                                       e.__class__.__name__)
                    except RuleNotFound, e:
                        pass
                    except InsufficientAccountLimit, e:
                        # A rule with InsufficientAccountLimit on injection hangs there potentially forever
                        # It should be marked as SUSPENDED
                        logging.info(
                            'rule_injector[%s/%s]: Marking rule %s as SUSPENDED due to InsufficientAccountLimit'
                            % (heartbeat['assign_thread'],
                               heartbeat['nr_threads'] - 1, rule_id))
                        update_rule(rule_id=rule_id,
                                    options={'state': 'SUSPENDED'})

        except (DatabaseException, DatabaseError), e:
            if match('.*QueuePool.*', str(e.args[0])):
                logging.warning(traceback.format_exc())
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
            elif match('.*ORA-03135.*', str(e.args[0])):
                logging.warning(traceback.format_exc())
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
            else:
                logging.critical(traceback.format_exc())
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
        except Exception, e:
Esempio n. 16
0
def run_once(paused_rules, heartbeat_handler, **_kwargs):
    worker_number, total_workers, logger = heartbeat_handler.live()

    try:
        start = time.time()

        # Refresh paused rules
        iter_paused_rules = deepcopy(paused_rules)
        for key in iter_paused_rules:
            if datetime.utcnow() > paused_rules[key]:
                del paused_rules[key]

        rules = get_injected_rules(total_workers=total_workers,
                                   worker_number=worker_number,
                                   limit=100,
                                   blocked_rules=[key for key in paused_rules])
        logger(
            logging.DEBUG, 'index query time %f fetch size is %d' %
            (time.time() - start, len(rules)))

        if not rules:
            logger(
                logging.DEBUG, 'did not get any work (paused_rules=%s)' %
                str(len(paused_rules)))
            return

        for rule in rules:
            _, _, logger = heartbeat_handler.live()
            rule_id = rule[0]
            logger(logging.INFO, 'Injecting rule %s' % rule_id)
            if graceful_stop.is_set():
                break
            try:
                start = time.time()
                inject_rule(rule_id=rule_id, logger=logger)
                logger(
                    logging.DEBUG,
                    'injection of %s took %f' % (rule_id, time.time() - start))
            except (DatabaseException, DatabaseError) as e:
                if match('.*ORA-00054.*', str(e.args[0])):
                    paused_rules[rule_id] = datetime.utcnow() + timedelta(
                        seconds=randint(60, 600))
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': 'LocksDetected'})
                    logger(logging.WARNING, 'Locks detected for %s' % rule_id)
                elif match('.*QueuePool.*', str(e.args[0])):
                    logger(logging.WARNING, 'DatabaseException', exc_info=True)
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': e.__class__.__name__})
                elif match('.*ORA-03135.*', str(e.args[0])):
                    logger(logging.WARNING, 'DatabaseException', exc_info=True)
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': e.__class__.__name__})
                else:
                    logger(logging.ERROR, 'DatabaseException', exc_info=True)
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': e.__class__.__name__})
            except (RSEWriteBlocked) as e:
                paused_rules[rule_id] = datetime.utcnow() + timedelta(
                    seconds=randint(60, 600))
                logger(logging.WARNING,
                       'RSEWriteBlocked for rule %s' % rule_id)
                record_counter('rule.judge.exceptions.{exception}',
                               labels={'exception': e.__class__.__name__})
            except ReplicationRuleCreationTemporaryFailed as e:
                paused_rules[rule_id] = datetime.utcnow() + timedelta(
                    seconds=randint(60, 600))
                logger(
                    logging.WARNING,
                    'ReplicationRuleCreationTemporaryFailed for rule %s' %
                    rule_id)
                record_counter('rule.judge.exceptions.{exception}',
                               labels={'exception': e.__class__.__name__})
            except RuleNotFound:
                pass
            except InsufficientAccountLimit:
                # A rule with InsufficientAccountLimit on injection hangs there potentially forever
                # It should be marked as SUSPENDED
                logger(
                    logging.INFO,
                    'Marking rule %s as SUSPENDED due to InsufficientAccountLimit'
                    % rule_id)
                update_rule(rule_id=rule_id, options={'state': 'SUSPENDED'})

    except (DatabaseException, DatabaseError) as e:
        if match('.*QueuePool.*', str(e.args[0])):
            logger(logging.WARNING, 'DatabaseException', exc_info=True)
            record_counter('rule.judge.exceptions.{exception}',
                           labels={'exception': e.__class__.__name__})
        elif match('.*ORA-03135.*', str(e.args[0])):
            logger(logging.WARNING, 'DatabaseException', exc_info=True)
            record_counter('rule.judge.exceptions.{exception}',
                           labels={'exception': e.__class__.__name__})
        else:
            logger(logging.CRITICAL, 'DatabaseException', exc_info=True)
            record_counter('rule.judge.exceptions.{exception}',
                           labels={'exception': e.__class__.__name__})
    except Exception as e:
        logger(logging.CRITICAL, 'Exception', exc_info=True)
        record_counter('rule.judge.exceptions.{exception}',
                       labels={'exception': e.__class__.__name__})