コード例 #1
0
def list_replication_rules(filters={}, vo='def', session=None):
    """
    Lists replication rules based on a filter.

    :param filters: dictionary of attributes by which the results should be filtered.
    :param vo: The VO to act on.
    :param session: The database session in use.
    """
    # If filters is empty, create a new dict to avoid overwriting the function's default
    if not filters:
        filters = {}

    if 'scope' in filters:
        scope = filters['scope']
    else:
        scope = '*'
    filters['scope'] = InternalScope(scope=scope, vo=vo)

    if 'account' in filters:
        account = filters['account']
    else:
        account = '*'
    filters['account'] = InternalAccount(account=account, vo=vo)

    rules = rule.list_rules(filters, session=session)
    for r in rules:
        yield api_update_return_dict(r, session=session)
コード例 #2
0
def list_replication_rules(filters={}):
    """
    Lists replication rules based on a filter.

    :param filters: dictionary of attributes by which the results should be filtered.
    """
    return rule.list_rules(filters)
コード例 #3
0
ファイル: rule.py プロジェクト: pombredanne/rucio
def list_replication_rules(filters={}):
    """
    Lists replication rules based on a filter.

    :param filters: dictionary of attributes by which the results should be filtered.
    """
    return rule.list_rules(filters)
コード例 #4
0
ファイル: test_judge_injector.py プロジェクト: kbg/rucio
 def test_add_rule_with_r2d2_container_treating_and_duplicate_rule(self):
     """ JUDGE INJECTOR (CORE): Add a replication rule with an r2d2 container treatment and duplicate rule"""
     scope = 'mock'
     container = 'asdf.r2d2_request.2016-04-01-15-00-00.ads.' + str(uuid())
     add_did(scope, container, DIDType.from_sym('CONTAINER'), 'jdoe')
     datasets = []
     for i in range(3):
         files = create_files(3, scope, self.rse1_id)
         dataset = 'dataset_' + str(uuid())
         datasets.append(dataset)
         add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
         attach_dids(scope, dataset, files, 'jdoe')
         attach_dids(scope, container, [{
             'scope': scope,
             'name': dataset
         }], 'jdoe')
     add_rule(dids=[{
         'scope': scope,
         'name': dataset
     }],
              account='jdoe',
              copies=1,
              rse_expression=self.rse1,
              grouping='DATASET',
              weight=None,
              lifetime=900,
              locked=False,
              subscription_id=None,
              ask_approval=False)
     rule_id = add_rule(dids=[{
         'scope': scope,
         'name': container
     }],
                        account='jdoe',
                        copies=1,
                        rse_expression=self.rse1,
                        grouping='DATASET',
                        weight=None,
                        lifetime=900,
                        locked=False,
                        subscription_id=None,
                        ask_approval=True)[0]
     approve_rule(rule_id, approver='root')
     assert (get_rule(rule_id)['state'] == RuleState.INJECT)
     rule_injector(once=True)
     # Check if there is a rule for each file
     with assert_raises(RuleNotFound):
         get_rule(rule_id)
     for dataset in datasets:
         assert (len(
             [r for r in list_rules({
                 'scope': scope,
                 'name': dataset
             })]) > 0)
コード例 #5
0
def list_replication_rules(filters={}):
    """
    Lists replication rules based on a filter.

    :param filters: dictionary of attributes by which the results should be filtered.
    """
    if 'scope' in filters:
        filters['scope'] = InternalScope(filters['scope'])
    if 'account' in filters:
        filters['account'] = InternalAccount(filters['account'])

    rules = rule.list_rules(filters)
    for r in rules:
        yield api_update_return_dict(r)
コード例 #6
0
def list_replication_rules(filters={}, vo='def'):
    """
    Lists replication rules based on a filter.

    :param filters: dictionary of attributes by which the results should be filtered.
    :param vo: The VO to act on.
    """
    if 'scope' in filters:
        scope = filters['scope']
    else:
        scope = '*'
    filters['scope'] = InternalScope(scope=scope, vo=vo)

    if 'account' in filters:
        account = filters['account']
    else:
        account = '*'
    filters['account'] = InternalAccount(account=account, vo=vo)

    rules = rule.list_rules(filters)
    for r in rules:
        yield api_update_return_dict(r)
コード例 #7
0
    def test_atlas_archival_policy(self):
        """ UNDERTAKER (CORE): Test the atlas archival policy. """
        tmp_scope = 'mock'
        nbdatasets = 5
        nbfiles = 5

        rse = 'LOCALGROUPDISK_%s' % rse_name_generator()
        add_rse(rse)

        set_account_limit('jdoe', get_rse_id(rse), -1)

        dsns2 = [{'name': 'dsn_%s' % generate_uuid(),
                  'scope': tmp_scope,
                  'type': 'DATASET',
                  'lifetime': -1,
                  'rules': [{'account': 'jdoe', 'copies': 1,
                             'rse_expression': rse,
                             'grouping': 'DATASET'}]} for i in range(nbdatasets)]

        add_dids(dids=dsns2, account='root')

        replicas = list()
        for dsn in dsns2:
            files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1,
                      'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': {'events': 10}} for i in range(nbfiles)]
            attach_dids(scope=tmp_scope, name=dsn['name'], rse=rse, dids=files, account='root')
            replicas += files

        undertaker(worker_number=1, total_workers=1, once=True)

        for replica in replicas:
            assert(get_replica(scope=replica['scope'], name=replica['name'], rse=rse)['tombstone'] is None)

        for dsn in dsns2:
            assert(get_did(scope='archive', name=dsn['name'])['name'] == dsn['name'])
            assert(len([x for x in list_rules(filters={'scope': 'archive', 'name': dsn['name']})]) == 1)
コード例 #8
0
def transmogrifier(bulk=5, once=False, sleep_time=60):
    """
    Creates a Transmogrifier Worker that gets a list of new DIDs for a given hash,
    identifies the subscriptions matching the DIDs and
    submit a replication rule for each DID matching a subscription.

    :param thread: Thread number at startup.
    :param bulk: The number of requests to process.
    :param once: Run only once.
    :param sleep_time: Time between two cycles.
    """

    executable = 'transmogrifier'
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()
    heartbeat.sanity_check(executable=executable, hostname=hostname)

    while not graceful_stop.is_set():

        heart_beat = heartbeat.live(executable, hostname, pid, hb_thread)

        dids, subscriptions = [], []
        tottime = 0
        prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'],
                                             heart_beat['nr_threads'])

        try:
            #  Get the new DIDs based on the is_new flag
            for did in list_new_dids(thread=heart_beat['assign_thread'],
                                     total_threads=heart_beat['nr_threads'],
                                     chunk_size=bulk,
                                     did_type=None):
                dids.append({
                    'scope': did['scope'],
                    'did_type': str(did['did_type']),
                    'name': did['name']
                })

            sub_dict = {3: []}
            #  Get the list of subscriptions. The default priority of the subscription is 3. 0 is the highest priority, 5 the lowest
            #  The priority is defined as 'policyid'
            for sub in list_subscriptions(None, None):
                if sub['state'] != SubscriptionState.INACTIVE and sub[
                        'lifetime'] and (datetime.now() > sub['lifetime']):
                    update_subscription(
                        name=sub['name'],
                        account=sub['account'],
                        metadata={'state': SubscriptionState.INACTIVE})

                elif sub['state'] in [
                        SubscriptionState.ACTIVE, SubscriptionState.UPDATED
                ]:
                    priority = 3
                    if 'policyid' in sub:
                        if int(sub['policyid']) not in sub_dict:
                            sub_dict[int(sub['policyid'])] = []
                        priority = int(sub['policyid'])
                    sub_dict[priority].append(sub)
            priorities = list(sub_dict.keys())
            priorities.sort()
            #  Order the subscriptions according to their priority
            for priority in priorities:
                subscriptions.extend(sub_dict[priority])
        except SubscriptionNotFound as error:
            logging.warning(prepend_str + 'No subscriptions defined: %s' %
                            (str(error)))
            time.sleep(10)
            continue
        except Exception as error:
            logging.error(
                prepend_str +
                'Failed to get list of new DIDs or subscriptions: %s' %
                (str(error)))

        try:
            results = {}
            start_time = time.time()
            blacklisted_rse_id = [
                rse['id'] for rse in list_rses({'availability_write': False})
            ]
            logging.debug(prepend_str + 'In transmogrifier worker')
            identifiers = []
            #  Loop over all the new dids
            for did in dids:
                did_success = True
                if did['did_type'] == str(
                        DIDType.DATASET) or did['did_type'] == str(
                            DIDType.CONTAINER):
                    did_tag = '%s:%s' % (did['scope'].internal, did['name'])
                    results[did_tag] = []
                    try:
                        metadata = get_metadata(did['scope'], did['name'])
                        # Loop over all the subscriptions
                        for subscription in subscriptions:
                            #  Check if the DID match the subscription
                            if is_matching_subscription(
                                    subscription, did, metadata) is True:
                                filter_string = loads(subscription['filter'])
                                split_rule = filter_string.get(
                                    'split_rule', False)
                                stime = time.time()
                                results[did_tag].append(subscription['id'])
                                logging.info(prepend_str +
                                             '%s:%s matches subscription %s' %
                                             (did['scope'], did['name'],
                                              subscription['name']))
                                rules = loads(
                                    subscription['replication_rules'])
                                created_rules = {}
                                cnt = 0
                                for rule_dict in rules:
                                    cnt += 1
                                    created_rules[cnt] = []
                                    # Get all the rule and subscription parameters
                                    grouping = rule_dict.get(
                                        'grouping', 'DATASET')
                                    lifetime = rule_dict.get('lifetime', None)
                                    ignore_availability = rule_dict.get(
                                        'ignore_availability', None)
                                    weight = rule_dict.get('weight', None)
                                    source_replica_expression = rule_dict.get(
                                        'source_replica_expression', None)
                                    locked = rule_dict.get('locked', None)
                                    if locked == 'True':
                                        locked = True
                                    else:
                                        locked = False
                                    purge_replicas = rule_dict.get(
                                        'purge_replicas', False)
                                    if purge_replicas == 'True':
                                        purge_replicas = True
                                    else:
                                        purge_replicas = False
                                    rse_expression = str(
                                        rule_dict['rse_expression'])
                                    comment = str(subscription['comments'])
                                    subscription_id = str(subscription['id'])
                                    account = subscription['account']
                                    copies = int(rule_dict['copies'])
                                    activity = rule_dict.get(
                                        'activity', 'User Subscriptions')
                                    try:
                                        validate_schema(name='activity',
                                                        obj=activity)
                                    except InputValidationError as error:
                                        logging.error(
                                            prepend_str +
                                            'Error validating the activity %s'
                                            % (str(error)))
                                        activity = 'User Subscriptions'
                                    if lifetime:
                                        lifetime = int(lifetime)

                                    str_activity = "".join(activity.split())
                                    success = False
                                    nattempt = 5
                                    attemptnr = 0
                                    skip_rule_creation = False

                                    selected_rses = []
                                    chained_idx = rule_dict.get(
                                        'chained_idx', None)
                                    if chained_idx:
                                        params = {}
                                        if rule_dict.get(
                                                'associated_site_idx', None):
                                            params[
                                                'associated_site_idx'] = rule_dict.get(
                                                    'associated_site_idx',
                                                    None)
                                        logging.debug(
                                            '%s Chained subscription identified. Will use %s',
                                            prepend_str,
                                            str(created_rules[chained_idx]))
                                        algorithm = rule_dict.get(
                                            'algorithm', None)
                                        selected_rses = select_algorithm(
                                            algorithm,
                                            created_rules[chained_idx], params)
                                    else:
                                        # In the case of chained subscription, don't use rseselector but use the rses returned by the algorithm
                                        if split_rule:
                                            vo = account.vo
                                            rses = parse_expression(
                                                rse_expression,
                                                filter={'vo': vo})
                                            list_of_rses = [
                                                rse['id'] for rse in rses
                                            ]
                                            # Check that some rule doesn't already exist for this DID and subscription
                                            preferred_rse_ids = []
                                            for rule in list_rules(
                                                    filters={
                                                        'subscription_id':
                                                        subscription_id,
                                                        'scope': did['scope'],
                                                        'name': did['name']
                                                    }):
                                                already_existing_rses = [
                                                    (rse['rse'], rse['id']) for
                                                    rse in parse_expression(
                                                        rule['rse_expression'],
                                                        filter={'vo': vo})
                                                ]
                                                for rse, rse_id in already_existing_rses:
                                                    if (rse_id in list_of_rses
                                                        ) and (
                                                            rse_id not in
                                                            preferred_rse_ids):
                                                        preferred_rse_ids.append(
                                                            rse_id)
                                            if len(preferred_rse_ids
                                                   ) >= copies:
                                                skip_rule_creation = True
                                            rse_id_dict = {}
                                            for rse in rses:
                                                rse_id_dict[
                                                    rse['id']] = rse['rse']
                                            try:
                                                rseselector = RSESelector(
                                                    account=account,
                                                    rses=rses,
                                                    weight=weight,
                                                    copies=copies -
                                                    len(preferred_rse_ids))
                                                selected_rses = [
                                                    rse_id_dict[rse_id]
                                                    for rse_id, _, _ in
                                                    rseselector.select_rse(
                                                        0,
                                                        preferred_rse_ids=
                                                        preferred_rse_ids,
                                                        copies=copies,
                                                        blacklist=
                                                        blacklisted_rse_id)
                                                ]
                                            except (InsufficientTargetRSEs,
                                                    InsufficientAccountLimit,
                                                    InvalidRuleWeight,
                                                    RSEOverQuota) as error:
                                                logging.warning(
                                                    prepend_str +
                                                    'Problem getting RSEs for subscription "%s" for account %s : %s. Try including blacklisted sites'
                                                    % (subscription['name'],
                                                       account, str(error)))
                                                # Now including the blacklisted sites
                                                try:
                                                    rseselector = RSESelector(
                                                        account=account,
                                                        rses=rses,
                                                        weight=weight,
                                                        copies=copies -
                                                        len(preferred_rse_ids))
                                                    selected_rses = [
                                                        rse_id_dict[rse_id]
                                                        for rse_id, _, _ in
                                                        rseselector.select_rse(
                                                            0,
                                                            preferred_rse_ids=
                                                            preferred_rse_ids,
                                                            copies=copies,
                                                            blacklist=[])
                                                    ]
                                                    ignore_availability = True
                                                except (InsufficientTargetRSEs,
                                                        InsufficientAccountLimit,
                                                        InvalidRuleWeight,
                                                        RSEOverQuota) as error:
                                                    logging.error(
                                                        prepend_str +
                                                        'Problem getting RSEs for subscription "%s" for account %s : %s. Skipping rule creation.'
                                                        %
                                                        (subscription['name'],
                                                         account, str(error)))
                                                    monitor.record_counter(
                                                        counters=
                                                        'transmogrifier.addnewrule.errortype.%s'
                                                        % (str(error.__class__.
                                                               __name__)),
                                                        delta=1)
                                                    # The DID won't be reevaluated at the next cycle
                                                    did_success = did_success and True
                                                    continue

                                    for attempt in range(0, nattempt):
                                        attemptnr = attempt
                                        nb_rule = 0
                                        #  Try to create the rule
                                        try:
                                            if split_rule:
                                                if not skip_rule_creation:
                                                    for rse in selected_rses:
                                                        if isinstance(
                                                                selected_rses,
                                                                dict):
                                                            source_replica_expression = selected_rses[
                                                                rse].get(
                                                                    'source_replica_expression',
                                                                    None)
                                                            weight = selected_rses[
                                                                rse].get(
                                                                    'weight',
                                                                    None)
                                                        logging.info(
                                                            prepend_str +
                                                            'Will insert one rule for %s:%s on %s'
                                                            %
                                                            (did['scope'],
                                                             did['name'], rse))
                                                        rule_ids = add_rule(
                                                            dids=[{
                                                                'scope':
                                                                did['scope'],
                                                                'name':
                                                                did['name']
                                                            }],
                                                            account=account,
                                                            copies=1,
                                                            rse_expression=rse,
                                                            grouping=grouping,
                                                            weight=weight,
                                                            lifetime=lifetime,
                                                            locked=locked,
                                                            subscription_id=
                                                            subscription_id,
                                                            source_replica_expression
                                                            =source_replica_expression,
                                                            activity=activity,
                                                            purge_replicas=
                                                            purge_replicas,
                                                            ignore_availability=
                                                            ignore_availability,
                                                            comment=comment)
                                                        created_rules[
                                                            cnt].append(
                                                                rule_ids[0])
                                                        nb_rule += 1
                                                        if nb_rule == copies:
                                                            success = True
                                                            break
                                            else:
                                                rule_ids = add_rule(
                                                    dids=[{
                                                        'scope': did['scope'],
                                                        'name': did['name']
                                                    }],
                                                    account=account,
                                                    copies=copies,
                                                    rse_expression=
                                                    rse_expression,
                                                    grouping=grouping,
                                                    weight=weight,
                                                    lifetime=lifetime,
                                                    locked=locked,
                                                    subscription_id=
                                                    subscription['id'],
                                                    source_replica_expression=
                                                    source_replica_expression,
                                                    activity=activity,
                                                    purge_replicas=
                                                    purge_replicas,
                                                    ignore_availability=
                                                    ignore_availability,
                                                    comment=comment)
                                                created_rules[cnt].append(
                                                    rule_ids[0])
                                                nb_rule += 1
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.done',
                                                delta=nb_rule)
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.activity.%s'
                                                % str_activity,
                                                delta=nb_rule)
                                            success = True
                                            break
                                        except (InvalidReplicationRule,
                                                InvalidRuleWeight,
                                                InvalidRSEExpression,
                                                StagingAreaRuleRequiresLifetime,
                                                DuplicateRule) as error:
                                            # Errors that won't be retried
                                            success = True
                                            logging.error(prepend_str + '%s' %
                                                          (str(error)))
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.errortype.%s'
                                                % (str(
                                                    error.__class__.__name__)),
                                                delta=1)
                                            break
                                        except (ReplicationRuleCreationTemporaryFailed,
                                                InsufficientTargetRSEs,
                                                InsufficientAccountLimit,
                                                DatabaseException,
                                                RSEBlacklisted,
                                                RSEWriteBlocked) as error:
                                            # Errors to be retried
                                            logging.error(
                                                prepend_str +
                                                '%s Will perform an other attempt %i/%i'
                                                % (str(error), attempt + 1,
                                                   nattempt))
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.errortype.%s'
                                                % (str(
                                                    error.__class__.__name__)),
                                                delta=1)
                                        except Exception:
                                            # Unexpected errors
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.errortype.unknown',
                                                delta=1)
                                            exc_type, exc_value, exc_traceback = exc_info(
                                            )
                                            logging.critical(
                                                prepend_str + ''.join(
                                                    format_exception(
                                                        exc_type, exc_value,
                                                        exc_traceback)).strip(
                                                        ))

                                    did_success = (did_success and success)
                                    if (attemptnr +
                                            1) == nattempt and not success:
                                        logging.error(
                                            prepend_str +
                                            'Rule for %s:%s on %s cannot be inserted'
                                            % (did['scope'], did['name'],
                                               rse_expression))
                                    else:
                                        logging.info(
                                            prepend_str +
                                            '%s rule(s) inserted in %f seconds'
                                            % (str(nb_rule),
                                               time.time() - stime))
                    except DataIdentifierNotFound as error:
                        logging.warning(prepend_str + error)

                if did_success:
                    if did['did_type'] == str(DIDType.FILE):
                        monitor.record_counter(
                            counters='transmogrifier.did.file.processed',
                            delta=1)
                    elif did['did_type'] == str(DIDType.DATASET):
                        monitor.record_counter(
                            counters='transmogrifier.did.dataset.processed',
                            delta=1)
                    elif did['did_type'] == str(DIDType.CONTAINER):
                        monitor.record_counter(
                            counters='transmogrifier.did.container.processed',
                            delta=1)
                    monitor.record_counter(
                        counters='transmogrifier.did.processed', delta=1)
                    identifiers.append({
                        'scope':
                        did['scope'],
                        'name':
                        did['name'],
                        'did_type':
                        DIDType.from_sym(did['did_type'])
                    })

            time1 = time.time()

            #  Mark the DIDs as processed
            for identifier in chunks(identifiers, 100):
                _retrial(set_new_dids, identifier, None)

            logging.info(prepend_str + 'Time to set the new flag : %f' %
                         (time.time() - time1))
            tottime = time.time() - start_time
            for sub in subscriptions:
                update_subscription(
                    name=sub['name'],
                    account=sub['account'],
                    metadata={'last_processed': datetime.now()})
            logging.info(prepend_str +
                         'It took %f seconds to process %i DIDs' %
                         (tottime, len(dids)))
            logging.debug(prepend_str + 'DIDs processed : %s' % (str(dids)))
            monitor.record_counter(counters='transmogrifier.job.done', delta=1)
            monitor.record_timer(stat='transmogrifier.job.duration',
                                 time=1000 * tottime)
        except Exception:
            exc_type, exc_value, exc_traceback = exc_info()
            logging.critical(prepend_str + ''.join(
                format_exception(exc_type, exc_value, exc_traceback)).strip())
            monitor.record_counter(counters='transmogrifier.job.error',
                                   delta=1)
            monitor.record_counter(counters='transmogrifier.addnewrule.error',
                                   delta=1)
        if once is True:
            break
        if tottime < sleep_time:
            logging.info(prepend_str + 'Will sleep for %s seconds' %
                         (sleep_time - tottime))
            time.sleep(sleep_time - tottime)
    heartbeat.die(executable, hostname, pid, hb_thread)
    logging.info(prepend_str + 'Graceful stop requested')
    logging.info(prepend_str + 'Graceful stop done')
コード例 #9
0
    def test_atlas_archival_policy(self):
        """ UNDERTAKER (CORE): Test the atlas archival policy. """
        if get_policy() != 'atlas':
            LOG.info("Skipping atlas-specific test")
            return

        tmp_scope = InternalScope('mock', **self.vo)
        jdoe = InternalAccount('jdoe', **self.vo)
        root = InternalAccount('root', **self.vo)

        nbdatasets = 5
        nbfiles = 5

        rse = 'LOCALGROUPDISK_%s' % rse_name_generator()
        rse_id = add_rse(rse, **self.vo)

        set_local_account_limit(jdoe, rse_id, -1)

        dsns2 = [{
            'name':
            'dsn_%s' % generate_uuid(),
            'scope':
            tmp_scope,
            'type':
            'DATASET',
            'lifetime':
            -1,
            'rules': [{
                'account': jdoe,
                'copies': 1,
                'rse_expression': rse,
                'grouping': 'DATASET'
            }]
        } for _ in range(nbdatasets)]

        add_dids(dids=dsns2, account=root)

        replicas = list()
        for dsn in dsns2:
            files = [{
                'scope': tmp_scope,
                'name': 'file_%s' % generate_uuid(),
                'bytes': 1,
                'adler32': '0cc737eb',
                'tombstone': datetime.utcnow() + timedelta(weeks=2),
                'meta': {
                    'events': 10
                }
            } for _ in range(nbfiles)]
            attach_dids(scope=tmp_scope,
                        name=dsn['name'],
                        rse_id=rse_id,
                        dids=files,
                        account=root)
            replicas += files

        undertaker(worker_number=1, total_workers=1, once=True)

        for replica in replicas:
            assert (get_replica(scope=replica['scope'],
                                name=replica['name'],
                                rse_id=rse_id)['tombstone'] is None)

        for dsn in dsns2:
            assert (get_did(scope=InternalScope('archive', **self.vo),
                            name=dsn['name'])['name'] == dsn['name'])
            assert (len([
                x for x in list_rules(
                    filters={
                        'scope': InternalScope('archive', **self.vo),
                        'name': dsn['name']
                    })
            ]) == 1)
コード例 #10
0
def add_files(lfns, account, ignore_availability, vo='def', session=None):
    """
    Bulk add files :
    - Create the file and replica.
    - If doesn't exist create the dataset containing the file as well as a rule on the dataset on ANY sites.
    - Create all the ascendants of the dataset if they do not exist

    :param lfns: List of lfn (dictionary {'lfn': <lfn>, 'rse': <rse>, 'bytes': <bytes>, 'adler32': <adler32>, 'guid': <guid>, 'pfn': <pfn>}
    :param issuer: The issuer account.
    :param ignore_availability: A boolean to ignore blocklisted sites.
    :param vo: The VO to act on
    :param session: The session used
    """
    rule_extension_list = []
    attachments = []
    # The list of scopes is necessary for the extract_scope
    filter_ = {'scope': InternalScope(scope='*', vo=vo)}
    scopes = list_scopes(filter_=filter_, session=session)
    scopes = [scope.external for scope in scopes]
    exist_lfn = []
    try:
        lifetime_dict = config_get(section='dirac', option='lifetime', session=session)
        lifetime_dict = loads(lifetime_dict)
    except ConfigNotFound:
        lifetime_dict = {}
    except JSONDecodeError as err:
        raise InvalidType('Problem parsing lifetime option in dirac section : %s' % str(err))
    except Exception as err:
        raise RucioException(str(err))

    for lfn in lfns:
        # First check if the file exists
        filename = lfn['lfn']
        lfn_scope, _ = extract_scope(filename, scopes)
        lfn_scope = InternalScope(lfn_scope, vo=vo)

        exists, did_type = _exists(lfn_scope, filename)
        if exists:
            continue

        # Get all the ascendants of the file
        lfn_split = filename.split('/')
        lpns = ["/".join(lfn_split[:idx]) for idx in range(2, len(lfn_split))]
        lpns.reverse()
        print(lpns)

        # The parent must be a dataset. Register it as well as the rule
        dsn_name = lpns[0]
        dsn_scope, _ = extract_scope(dsn_name, scopes)
        dsn_scope = InternalScope(dsn_scope, vo=vo)

        # Compute lifetime
        lifetime = None
        if dsn_scope in lifetime_dict:
            lifetime = lifetime_dict[dsn_scope]
        else:
            for pattern in lifetime_dict:
                if re.match(pattern, dsn_scope):
                    lifetime = lifetime_dict[pattern]
                    break

        exists, did_type = _exists(dsn_scope, dsn_name)
        if exists and did_type == DIDType.CONTAINER:
            raise UnsupportedOperation('Cannot create %s as dataset' % dsn_name)
        if (dsn_name not in exist_lfn) and not exists:
            print('Will create %s' % dsn_name)
            # to maintain a compatibility between master and LTS-1.26 branches remove keywords for first 3 arguments
            add_did(dsn_scope,
                    dsn_name,
                    DIDType.DATASET,
                    account=InternalAccount(account, vo=vo),
                    statuses=None,
                    meta=None,
                    rules=[{'copies': 1, 'rse_expression': 'ANY=true', 'weight': None, 'account': InternalAccount(account, vo=vo), 'lifetime': None, 'grouping': 'NONE'}],
                    lifetime=None,
                    dids=None,
                    rse_id=None,
                    session=session)
            exist_lfn.append(dsn_name)
            parent_name = lpns[1]
            parent_scope, _ = extract_scope(parent_name, scopes)
            parent_scope = InternalScope(parent_scope, vo=vo)
            attachments.append({'scope': parent_scope, 'name': parent_name, 'dids': [{'scope': dsn_scope, 'name': dsn_name}]})
            rule_extension_list.append((dsn_scope, dsn_name))
        if lifetime and (dsn_scope, dsn_name) not in rule_extension_list:
            # Reset the lifetime of the rule to the configured value
            rule = [rul for rul in list_rules({'scope': dsn_scope, 'name': dsn_name, 'account': InternalAccount(account, vo=vo)}, session=session) if rul['rse_expression'] == 'ANY=true']
            if rule:
                update_rule(rule[0]['id'], options={'lifetime': lifetime}, session=session)
            rule_extension_list.append((dsn_scope, dsn_name))

        # Register the file
        rse_id = lfn.get('rse_id', None)
        if not rse_id:
            raise InvalidType('Missing rse_id')
        bytes_ = lfn.get('bytes', None)
        guid = lfn.get('guid', None)
        adler32 = lfn.get('adler32', None)
        pfn = lfn.get('pfn', None)
        files = {'scope': lfn_scope, 'name': filename, 'bytes': bytes_, 'adler32': adler32}
        if pfn:
            files['pfn'] = str(pfn)
        if guid:
            files['meta'] = {'guid': guid}
        add_replicas(rse_id=rse_id,
                     files=[files],
                     dataset_meta=None,
                     account=InternalAccount(account, vo=vo),
                     ignore_availability=ignore_availability,
                     session=session)
        add_rule(dids=[{'scope': lfn_scope, 'name': filename}],
                 account=InternalAccount(account, vo=vo),
                 copies=1,
                 rse_expression=lfn['rse'],
                 grouping=None,
                 weight=None,
                 lifetime=86400,
                 locked=None,
                 subscription_id=None,
                 session=session)
        attachments.append({'scope': dsn_scope, 'name': dsn_name, 'dids': [{'scope': lfn_scope, 'name': filename}]})

        # Now loop over the ascendants of the dataset and created them
        for lpn in lpns[1:]:
            child_scope, _ = extract_scope(lpn, scopes)
            child_scope = InternalScope(child_scope, vo=vo)
            exists, did_type = _exists(child_scope, lpn)
            if exists and did_type == DIDType.DATASET:
                raise UnsupportedOperation('Cannot create %s as container' % lpn)
            if (lpn not in exist_lfn) and not exists:
                print('Will create %s' % lpn)
                add_did(child_scope,
                        lpn,
                        DIDType.CONTAINER,
                        account=InternalAccount(account, vo=vo),
                        statuses=None,
                        meta=None,
                        rules=None,
                        lifetime=None,
                        dids=None,
                        rse_id=None,
                        session=session)
                exist_lfn.append(lpn)
                parent_name = lpns[lpns.index(lpn) + 1]
                parent_scope, _ = extract_scope(parent_name, scopes)
                parent_scope = InternalScope(parent_scope, vo=vo)
                attachments.append({'scope': parent_scope, 'name': parent_name, 'dids': [{'scope': child_scope, 'name': lpn}]})
    # Finally attach everything
    attach_dids_to_dids(attachments,
                        account=InternalAccount(account, vo=vo),
                        ignore_duplicate=True,
                        session=session)
コード例 #11
0
                'bytes': 1L,
                'adler32': '0cc737eb',
                'tombstone': datetime.utcnow() + timedelta(weeks=2),
                'meta': {
                    'events': 10
                }
            } for i in xrange(nbfiles)]
            attach_dids(scope=tmp_scope,
                        name=dsn['name'],
                        rse=rse,
                        dids=files,
                        account='root')
            replicas += files

        undertaker(worker_number=1, total_workers=1, once=True)

        for replica in replicas:
            assert (get_replica(scope=replica['scope'],
                                name=replica['name'],
                                rse=rse)['tombstone'] is None)

        for dsn in dsns2:
            assert (get_did(scope='archive',
                            name=dsn['name'])['name'] == dsn['name'])
            assert (len([
                x for x in list_rules(filters={
                    'scope': 'archive',
                    'name': dsn['name']
                })
            ]) == 1)
コード例 #12
0
                                    success = False
                                    nattempt = 5
                                    attemptnr = 0
                                    skip_rule_creation = False

                                    if split_rule:
                                        rses = parse_expression(rse_expression)
                                        list_of_rses = [
                                            rse['rse'] for rse in rses
                                        ]
                                        # Check that some rule doesn't already exist for this DID and subscription
                                        preferred_rse_ids = []
                                        for rule in list_rules(
                                                filters={
                                                    'subscription_id':
                                                    subscription_id,
                                                    'scope': did['scope'],
                                                    'name': did['name']
                                                }):
                                            already_existing_rses = [
                                                (rse['rse'], rse['id'])
                                                for rse in parse_expression(
                                                    rule['rse_expression'])
                                            ]
                                            for rse, rse_id in already_existing_rses:
                                                if (rse in list_of_rses) and (
                                                        rse_id not in
                                                        preferred_rse_ids):
                                                    preferred_rse_ids.append(
                                                        rse_id)
                                        if len(preferred_rse_ids) >= copies:
コード例 #13
0
def run_once(heartbeat_handler: "HeartbeatHandler", bulk: int,
             **_kwargs) -> bool:

    worker_number, total_workers, logger = heartbeat_handler.live()
    dids, subscriptions = [], []
    tottime = 0
    try:
        #  Get the new DIDs based on the is_new flag
        logger(logging.DEBUG, "Listing new dids")
        for did in list_new_dids(
                thread=worker_number,
                total_threads=total_workers,
                chunk_size=bulk,
                did_type=None,
        ):
            dids.append({
                "scope": did["scope"],
                "did_type": str(did["did_type"]),
                "name": did["name"],
            })
        logger(logging.INFO, "%i new dids to process", len(dids))

        sub_dict = {3: []}
        #  Get the list of subscriptions. The default priority of the subscription is 3. 0 is the highest priority, 5 the lowest
        #  The priority is defined as 'policyid'
        logger(logging.DEBUG, "Listing active subscriptions")
        for sub in list_subscriptions(None, None):
            if (sub["state"] != SubscriptionState.INACTIVE and sub["lifetime"]
                    and (datetime.now() > sub["lifetime"])):
                update_subscription(
                    name=sub["name"],
                    account=sub["account"],
                    metadata={"state": SubscriptionState.INACTIVE},
                )

            elif sub["state"] in [
                    SubscriptionState.ACTIVE, SubscriptionState.UPDATED
            ]:
                priority = 3
                if "policyid" in sub:
                    if int(sub["policyid"]) not in sub_dict:
                        sub_dict[int(sub["policyid"])] = []
                    priority = int(sub["policyid"])
                sub_dict[priority].append(sub)
        priorities = list(sub_dict.keys())
        priorities.sort()
        #  Order the subscriptions according to their priority
        for priority in priorities:
            subscriptions.extend(sub_dict[priority])
        logger(logging.INFO, "%i active subscriptions", len(subscriptions))
    except SubscriptionNotFound as error:
        logger(logging.WARNING, "No subscriptions defined: %s" % (str(error)))
        must_sleep = True
        return must_sleep
    except Exception as error:
        logger(
            logging.ERROR,
            "Failed to get list of new DIDs or subscriptions: %s" %
            (str(error)),
        )
        must_sleep = False
        return must_sleep

    results = {}
    start_time = time.time()
    blocklisted_rse_id = [
        rse["id"] for rse in list_rses({"availability_write": False})
    ]
    identifiers = []
    #  Loop over all the new dids
    for did in dids:
        _, _, logger = heartbeat_handler.live()
        did_success = True
        if did["did_type"] == str(DIDType.DATASET) or did["did_type"] == str(
                DIDType.CONTAINER):
            did_tag = "%s:%s" % (did["scope"].internal, did["name"])
            results[did_tag] = []
            try:
                metadata = get_metadata(did["scope"], did["name"])
                # Loop over all the subscriptions
                for subscription in subscriptions:
                    #  Check if the DID match the subscription
                    if is_matching_subscription(subscription, did,
                                                metadata) is True:
                        filter_string = loads(subscription["filter"])
                        split_rule = filter_string.get("split_rule", False)
                        stime = time.time()
                        results[did_tag].append(subscription["id"])
                        logger(
                            logging.INFO,
                            "%s:%s matches subscription %s" %
                            (did["scope"], did["name"], subscription["name"]),
                        )
                        rules = loads(subscription["replication_rules"])
                        created_rules = {}
                        cnt = 0
                        for rule_dict in rules:
                            cnt += 1
                            created_rules[cnt] = []
                            # Get all the rule and subscription parameters
                            grouping = rule_dict.get("grouping", "DATASET")
                            lifetime = rule_dict.get("lifetime", None)
                            ignore_availability = rule_dict.get(
                                "ignore_availability", None)
                            weight = rule_dict.get("weight", None)
                            source_replica_expression = rule_dict.get(
                                "source_replica_expression", None)
                            locked = rule_dict.get("locked", None)
                            if locked == "True":
                                locked = True
                            else:
                                locked = False
                            purge_replicas = rule_dict.get(
                                "purge_replicas", False)
                            if purge_replicas == "True":
                                purge_replicas = True
                            else:
                                purge_replicas = False
                            rse_expression = str(rule_dict["rse_expression"])
                            comment = str(subscription["comments"]
                                          )[:RULES_COMMENT_LENGTH]
                            if "comments" in rule_dict:
                                comment = str(rule_dict["comments"])
                            subscription_id = str(subscription["id"])
                            account = subscription["account"]
                            copies = int(rule_dict["copies"])
                            activity = rule_dict.get("activity",
                                                     "User Subscriptions")
                            try:
                                validate_schema(name="activity",
                                                obj=activity,
                                                vo=account.vo)
                            except InputValidationError as error:
                                logger(
                                    logging.ERROR,
                                    "Error validating the activity %s" %
                                    (str(error)),
                                )
                                activity = "User Subscriptions"
                            if lifetime:
                                lifetime = int(lifetime)

                            str_activity = "".join(activity.split())
                            success = False
                            nattempt = 5
                            attemptnr = 0
                            skip_rule_creation = False

                            selected_rses = []
                            chained_idx = rule_dict.get("chained_idx", None)
                            if chained_idx:
                                params = {}
                                if rule_dict.get("associated_site_idx", None):
                                    params[
                                        "associated_site_idx"] = rule_dict.get(
                                            "associated_site_idx", None)
                                logger(
                                    logging.DEBUG,
                                    "Chained subscription identified. Will use %s",
                                    str(created_rules[chained_idx]),
                                )
                                algorithm = rule_dict.get("algorithm", None)
                                selected_rses = select_algorithm(
                                    algorithm, created_rules[chained_idx],
                                    params)
                            else:
                                # In the case of chained subscription, don't use rseselector but use the rses returned by the algorithm
                                if split_rule:
                                    preferred_rses = set()
                                    for rule in list_rules(
                                            filters={
                                                "subscription_id":
                                                subscription_id,
                                                "scope": did["scope"],
                                                "name": did["name"],
                                            }):
                                        for rse_dict in parse_expression(
                                                rule["rse_expression"],
                                                filter_={"vo": account.vo},
                                        ):
                                            preferred_rses.add(rse_dict["rse"])
                                    preferred_rses = list(preferred_rses)

                                    try:
                                        (
                                            selected_rses,
                                            preferred_unmatched,
                                        ) = resolve_rse_expression(
                                            rse_expression,
                                            account,
                                            weight=weight,
                                            copies=copies,
                                            size=0,
                                            preferred_rses=preferred_rses,
                                            blocklist=blocklisted_rse_id,
                                        )

                                    except (
                                            InsufficientTargetRSEs,
                                            InsufficientAccountLimit,
                                            InvalidRuleWeight,
                                            RSEOverQuota,
                                    ) as error:
                                        logger(
                                            logging.WARNING,
                                            'Problem getting RSEs for subscription "%s" for account %s : %s. Try including blocklisted sites'
                                            % (
                                                subscription["name"],
                                                account,
                                                str(error),
                                            ),
                                        )
                                        # Now including the blocklisted sites
                                        try:
                                            (
                                                selected_rses,
                                                preferred_unmatched,
                                            ) = resolve_rse_expression(
                                                rse_expression,
                                                account,
                                                weight=weight,
                                                copies=copies,
                                                size=0,
                                                preferred_rses=preferred_rses,
                                            )
                                            ignore_availability = True
                                        except (
                                                InsufficientTargetRSEs,
                                                InsufficientAccountLimit,
                                                InvalidRuleWeight,
                                                RSEOverQuota,
                                        ) as error:
                                            logger(
                                                logging.ERROR,
                                                'Problem getting RSEs for subscription "%s" for account %s : %s. Skipping rule creation.'
                                                % (
                                                    subscription["name"],
                                                    account,
                                                    str(error),
                                                ),
                                            )
                                            monitor.record_counter(
                                                name=
                                                "transmogrifier.addnewrule.errortype.{exception}",
                                                labels={
                                                    "exception":
                                                    str(error.__class__.
                                                        __name__)
                                                },
                                            )
                                            # The DID won't be reevaluated at the next cycle
                                            did_success = did_success and True
                                            continue

                                    if (len(preferred_rses) -
                                            len(preferred_unmatched) >=
                                            copies):
                                        skip_rule_creation = True

                            for attempt in range(0, nattempt):
                                attemptnr = attempt
                                nb_rule = 0
                                #  Try to create the rule
                                try:
                                    if split_rule:
                                        if not skip_rule_creation:
                                            for rse in selected_rses:
                                                if isinstance(
                                                        selected_rses, dict):
                                                    source_replica_expression = (
                                                        selected_rses[rse].get(
                                                            "source_replica_expression",
                                                            None,
                                                        ))
                                                    weight = selected_rses[
                                                        rse].get(
                                                            "weight", None)
                                                logger(
                                                    logging.INFO,
                                                    "Will insert one rule for %s:%s on %s"
                                                    % (did["scope"],
                                                       did["name"], rse),
                                                )
                                                rule_ids = add_rule(
                                                    dids=[{
                                                        "scope": did["scope"],
                                                        "name": did["name"],
                                                    }],
                                                    account=account,
                                                    copies=1,
                                                    rse_expression=rse,
                                                    grouping=grouping,
                                                    weight=weight,
                                                    lifetime=lifetime,
                                                    locked=locked,
                                                    subscription_id=
                                                    subscription_id,
                                                    source_replica_expression=
                                                    source_replica_expression,
                                                    activity=activity,
                                                    purge_replicas=
                                                    purge_replicas,
                                                    ignore_availability=
                                                    ignore_availability,
                                                    comment=comment,
                                                )
                                                created_rules[cnt].append(
                                                    rule_ids[0])
                                                nb_rule += 1
                                                if nb_rule == copies:
                                                    success = True
                                                    break
                                    else:
                                        rule_ids = add_rule(
                                            dids=[{
                                                "scope": did["scope"],
                                                "name": did["name"],
                                            }],
                                            account=account,
                                            copies=copies,
                                            rse_expression=rse_expression,
                                            grouping=grouping,
                                            weight=weight,
                                            lifetime=lifetime,
                                            locked=locked,
                                            subscription_id=subscription["id"],
                                            source_replica_expression=
                                            source_replica_expression,
                                            activity=activity,
                                            purge_replicas=purge_replicas,
                                            ignore_availability=
                                            ignore_availability,
                                            comment=comment,
                                        )
                                        created_rules[cnt].append(rule_ids[0])
                                        nb_rule += 1
                                    monitor.record_counter(
                                        name="transmogrifier.addnewrule.done",
                                        delta=nb_rule,
                                    )
                                    monitor.record_counter(
                                        name=
                                        "transmogrifier.addnewrule.activity.{activity}",
                                        delta=nb_rule,
                                        labels={"activity": str_activity},
                                    )
                                    success = True
                                    break
                                except (
                                        InvalidReplicationRule,
                                        InvalidRuleWeight,
                                        InvalidRSEExpression,
                                        StagingAreaRuleRequiresLifetime,
                                        DuplicateRule,
                                ) as error:
                                    # Errors that won't be retried
                                    success = True
                                    logger(logging.ERROR, str(error))
                                    monitor.record_counter(
                                        name=
                                        "transmogrifier.addnewrule.errortype.{exception}",
                                        labels={
                                            "exception":
                                            str(error.__class__.__name__)
                                        },
                                    )
                                    break
                                except (
                                        ReplicationRuleCreationTemporaryFailed,
                                        InsufficientTargetRSEs,
                                        InsufficientAccountLimit,
                                        DatabaseException,
                                        RSEWriteBlocked,
                                ) as error:
                                    # Errors to be retried
                                    logger(
                                        logging.ERROR,
                                        "%s Will perform an other attempt %i/%i"
                                        % (str(error), attempt + 1, nattempt),
                                    )
                                    monitor.record_counter(
                                        name=
                                        "transmogrifier.addnewrule.errortype.{exception}",
                                        labels={
                                            "exception":
                                            str(error.__class__.__name__)
                                        },
                                    )
                                except Exception:
                                    # Unexpected errors
                                    monitor.record_counter(
                                        name=
                                        "transmogrifier.addnewrule.errortype.{exception}",
                                        labels={"exception": "unknown"},
                                    )
                                    logger(logging.ERROR,
                                           "Unexpected error",
                                           exc_info=True)

                            did_success = did_success and success
                            if (attemptnr + 1) == nattempt and not success:
                                logger(
                                    logging.ERROR,
                                    "Rule for %s:%s on %s cannot be inserted" %
                                    (did["scope"], did["name"],
                                     rse_expression),
                                )
                            else:
                                logger(
                                    logging.INFO,
                                    "%s rule(s) inserted in %f seconds" %
                                    (str(nb_rule), time.time() - stime),
                                )
            except DataIdentifierNotFound as error:
                logger(logging.WARNING, str(error))

        if did_success:
            if did["did_type"] == str(DIDType.FILE):
                monitor.record_counter(
                    name="transmogrifier.did.file.processed")
            elif did["did_type"] == str(DIDType.DATASET):
                monitor.record_counter(
                    name="transmogrifier.did.dataset.processed")
            elif did["did_type"] == str(DIDType.CONTAINER):
                monitor.record_counter(
                    name="transmogrifier.did.container.processed", delta=1)
            monitor.record_counter(name="transmogrifier.did.processed",
                                   delta=1)
            identifiers.append({
                "scope": did["scope"],
                "name": did["name"],
                "did_type": did["did_type"],
            })

    time1 = time.time()

    #  Mark the DIDs as processed
    for identifier in chunks(identifiers, 100):
        _retrial(set_new_dids, identifier, None)

    logger(logging.DEBUG,
           "Time to set the new flag : %f" % (time.time() - time1))
    tottime = time.time() - start_time
    for sub in subscriptions:
        update_subscription(
            name=sub["name"],
            account=sub["account"],
            metadata={"last_processed": datetime.now()},
        )
    logger(logging.INFO,
           "It took %f seconds to process %i DIDs" % (tottime, len(dids)))
    logger(logging.DEBUG, "DIDs processed : %s" % (str(dids)))
    monitor.record_counter(name="transmogrifier.job.done", delta=1)
    monitor.record_timer(name="transmogrifier.job.duration",
                         time=1000 * tottime)
    must_sleep = True
    return must_sleep