Example #1
0
def test_lifetime_truncate_expiration(root_account, rse_factory, mock_scope, did_factory, rucio_client):
    """
    Test the duration of a lifetime exception is truncated if max_extension is defined
    """
    nb_datasets = 2
    today = datetime.now()
    yesterday = today - timedelta(days=1)
    tomorrow = today + timedelta(days=1)
    next_year = today + timedelta(days=365)
    rse, rse_id = rse_factory.make_posix_rse()
    datasets = [did_factory.make_dataset() for _ in range(nb_datasets)]
    metadata = str(uuid())
    for dataset in datasets:
        set_metadata(dataset['scope'], dataset['name'], 'datatype', metadata)
        set_metadata(dataset['scope'], dataset['name'], 'eol_at', yesterday)

    client_datasets = list()
    for dataset in datasets:
        client_datasets.append({'scope': dataset['scope'].external, 'name': dataset['name'], 'did_type': 'DATASET'})

    tomorrow = tomorrow.strftime('%Y-%m-%d')
    config_core.set(section='lifetime_model', option='cutoff_date', value=tomorrow)
    config_core.get(section='lifetime_model', option='cutoff_date', default=None, use_cache=False)
    config_core.set(section='lifetime_model', option='max_extension', value=30)
    config_core.get(section='lifetime_model', option='max_extension', default=None, use_cache=False)
    result = rucio_client.add_exception(client_datasets, account='root', pattern='wekhewfk', comments='This is a comment', expires_at=next_year)

    exception_id = list(result['exceptions'].keys())[0]
    exception = [exception for exception in rucio_client.list_exceptions() if exception['id'] == exception_id][0]
    assert exception['expires_at'] - exception['created_at'] < timedelta(31)
    assert exception['expires_at'] - exception['created_at'] > timedelta(29)
Example #2
0
def test_bb8_rebalance_rule(vo, root_account, jdoe_account, rse_factory, mock_scope, did_factory):
    """BB8: Test the rebalance rule method"""
    rse1, rse1_id = rse_factory.make_posix_rse()
    rse2, rse2_id = rse_factory.make_posix_rse()

    # Add Tags
    T1 = tag_generator()
    T2 = tag_generator()
    add_rse_attribute(rse1_id, T1, True)
    add_rse_attribute(rse2_id, T2, True)

    # Add fake weights
    add_rse_attribute(rse1_id, "fakeweight", 10)
    add_rse_attribute(rse2_id, "fakeweight", 0)

    # Add quota
    set_local_account_limit(jdoe_account, rse1_id, -1)
    set_local_account_limit(jdoe_account, rse2_id, -1)

    set_local_account_limit(root_account, rse1_id, -1)
    set_local_account_limit(root_account, rse2_id, -1)

    files = create_files(3, mock_scope, rse1_id)
    dataset = did_factory.make_dataset()
    attach_dids(mock_scope, dataset['name'], files, jdoe_account)
    set_status(mock_scope, dataset['name'], open=False)

    # Invalid the cache because the result of parse_expression is cached
    REGION.invalidate()

    rule_id = add_rule(dids=[{'scope': mock_scope, 'name': dataset['name']}], account=jdoe_account, copies=1, rse_expression=rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0]
    rule = {}
    try:
        rule = get_rule(rule_id)
    except:
        pytest.raises(RuleNotFound, get_rule, rule_id)
    child_rule = rebalance_rule(rule, 'Rebalance', rse2, priority=3)

    rule_cleaner(once=True)

    assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow())
    assert(get_rule(rule_id)['child_rule_id'] == child_rule)

    rule_cleaner(once=True)

    assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow())

    successful_transfer(scope=mock_scope, name=files[0]['name'], rse_id=rse2_id, nowait=False)
    successful_transfer(scope=mock_scope, name=files[1]['name'], rse_id=rse2_id, nowait=False)
    with pytest.raises(UnsupportedOperation):
        delete_rule(rule_id)
    successful_transfer(scope=mock_scope, name=files[2]['name'], rse_id=rse2_id, nowait=False)

    rule_cleaner(once=True)
    assert(get_rule(child_rule)['state'] == RuleState.OK)
    set_metadata(mock_scope, dataset['name'], 'lifetime', -86400)
    undertaker.run(once=True)
Example #3
0
def test_atropos(root_account, rse_factory, mock_scope, did_factory, rucio_client):
    """
    Test the behaviour of atropos
    """
    today = datetime.now()
    check_date = datetime.now() + timedelta(days=365)
    check_date = check_date.isoformat().split('T')[0]

    # Define a policy
    lifetime_dir = '/opt/rucio/etc/policies'
    os.makedirs('/opt/rucio/etc/policies', exist_ok=True)
    lifetime_policy = [{'name': 'Test', 'include': {'datatype': ['RAW'], 'project': ['data%']}, 'age': '6', 'extension': '1'}]
    with open('%s/config_other.json' % lifetime_dir, 'w') as outfile:
        json.dump(lifetime_policy, outfile)
    REGION.invalidate()
    nb_datasets = 2
    today = datetime.now()
    rse, rse_id = rse_factory.make_posix_rse()
    datasets = [did_factory.make_dataset() for _ in range(nb_datasets)]
    rules = list()
    expiration_date = None

    # Check that the eol_at is properly set
    # Rule on dataset 0 that matches the policy should get an eol_at
    # Rule on dataset 1 that doesn't matches the policy should not get an eol_at
    for cnt, dataset in enumerate(datasets):
        if cnt == 0:
            set_metadata(dataset['scope'], dataset['name'], 'datatype', 'RAW')
            set_metadata(dataset['scope'], dataset['name'], 'project', 'data')
        rule_ids = add_rule(dids=[{'scope': dataset['scope'], 'name': dataset['name']}], account=root_account, copies=1, rse_expression=rse, grouping='DATASET', weight=None, lifetime=None, locked=None, subscription_id=None)
        rules.append(rule_ids[0])
        rule = get_rule(rule_ids[0])
        if cnt == 0:
            expiration_date = rule['eol_at']
            assert expiration_date is not None
            assert expiration_date - today < timedelta(181)
            assert expiration_date - today > timedelta(179)
        else:
            assert rule['eol_at'] is None

    # Run atropos in dry-run mode to set eol_at on the dataset
    # Dataset 0 should get eol_at
    # Dataset 1 should not get eol_at
    atropos(thread=1, bulk=100, date_check=datetime.strptime(check_date, '%Y-%m-%d'), dry_run=True, grace_period=86400,
            once=True, unlock=False, spread_period=0, purge_replicas=False, sleep_time=60)

    for cnt, dataset in enumerate(datasets):
        meta = get_metadata(dataset['scope'], dataset['name'])
        if cnt == 0:
            assert meta['eol_at'] is not None
            assert meta['eol_at'] == expiration_date
        else:
            assert meta['eol_at'] is None

    # Clean-up
    os.remove('/opt/rucio/etc/policies/config_other.json')
Example #4
0
File: did.py Project: kbg/rucio
def set_metadata(scope, name, key, value, issuer, recursive=False):
    """
    Add metadata to data did.

    :param scope: The scope name.
    :param name: The data identifier name.
    :param key: the key.
    :param value: the value.
    :param issuer: The issuer account.
    :param recursive: Option to propagate the metadata update to content.
    """
    kwargs = {
        'scope': scope,
        'name': name,
        'key': key,
        'value': value,
        'issuer': issuer
    }

    if key in RESERVED_KEYS:
        raise rucio.common.exception.AccessDenied(
            'Account %s can not change this metadata value to data identifier %s:%s'
            % (issuer, scope, name))

    if not rucio.api.permission.has_permission(
            issuer=issuer, action='set_metadata', kwargs=kwargs):
        raise rucio.common.exception.AccessDenied(
            'Account %s can not add metadata to data identifier %s:%s' %
            (issuer, scope, name))
    return did.set_metadata(scope=scope,
                            name=name,
                            key=key,
                            value=value,
                            recursive=recursive)
Example #5
0
def set_metadata(scope, name, key, value, issuer):
    """
    Add metadata to data did.

    :param scope: The scope name.
    :param name: The data identifier name.
    :param key: the key.
    :param value: the value.
    :param issuer: The issuer account.
    """

    kwargs = {'scope': scope, 'name': name, 'key': key, 'value': value, 'issuer': issuer}

    if key in reserved_keys:
        raise rucio.common.exception.AccessDenied('Account %s can not change this metadata value to data identifier %s:%s' % (issuer, scope, name))

    if not rucio.api.permission.has_permission(issuer=issuer, action='set_metadata', kwargs=kwargs):
        raise rucio.common.exception.AccessDenied('Account %s can not add metadata to data identifier %s:%s' % (issuer, scope, name))
    return did.set_metadata(scope=scope, name=name, key=key, value=value)
Example #6
0
    def test_update_dids(self):
        """ DATA IDENTIFIERS (CORE): Update file size and checksum"""
        tmp_scope = 'mock'
        dsn = 'dsn_%s' % generate_uuid()
        lfn = 'lfn.%s' % str(generate_uuid())
        add_did(scope=tmp_scope, name=dsn, type=DIDType.DATASET, account='root')

        files = [{'scope': tmp_scope, 'name': lfn,
                  'bytes': 724963570, 'adler32': '0cc737eb',
                  'meta': {'guid': str(generate_uuid()), 'events': 100}}]
        attach_dids(scope=tmp_scope, name=dsn, rse='MOCK', dids=files, account='root')

        set_metadata(scope=tmp_scope, name=lfn, key='adler32', value='0cc737ee')
        assert_equal(get_metadata(scope=tmp_scope, name=lfn)['adler32'], '0cc737ee')

        with assert_raises(UnsupportedOperation):
            set_metadata(scope=tmp_scope, name='Nimportnawak', key='adler32', value='0cc737ee')

        set_metadata(scope=tmp_scope, name=lfn, key='bytes', value=724963577)
        assert_equal(get_metadata(scope=tmp_scope, name=lfn)['bytes'], 724963577)
Example #7
0
    def test_update_dids(self):
        """ DATA IDENTIFIERS (CORE): Update file size and checksum"""
        tmp_scope = InternalScope('mock', **self.vo)
        root = InternalAccount('root', **self.vo)
        dsn = 'dsn_%s' % generate_uuid()
        lfn = 'lfn.%s' % str(generate_uuid())
        add_did(scope=tmp_scope, name=dsn, type=DIDType.DATASET, account=root)

        files = [{'scope': tmp_scope, 'name': lfn,
                  'bytes': 724963570, 'adler32': '0cc737eb',
                  'meta': {'guid': str(generate_uuid()), 'events': 100}}]
        attach_dids(scope=tmp_scope, name=dsn, rse_id=get_rse_id(rse='MOCK', **self.vo), dids=files, account=root)

        set_metadata(scope=tmp_scope, name=lfn, key='adler32', value='0cc737ee')
        assert get_metadata(scope=tmp_scope, name=lfn)['adler32'] == '0cc737ee'

        with pytest.raises(DataIdentifierNotFound):
            set_metadata(scope=tmp_scope, name='Nimportnawak', key='adler32', value='0cc737ee')

        set_metadata(scope=tmp_scope, name=lfn, key='bytes', value=724963577)
        assert get_metadata(scope=tmp_scope, name=lfn)['bytes'] == 724963577
Example #8
0
    def setUp(self):
        if config_get_bool('common', 'multi_vo', raise_exception=False, default=False):
            self.vo = {'vo': get_vo()}
        else:
            self.vo = {}

        self.replica_client = ReplicaClient()

        # Using two test RSEs
        self.rse4suspicious = 'MOCK_SUSPICIOUS'
        self.rse4suspicious_id = get_rse_id(self.rse4suspicious, **self.vo)
        self.rse4recovery = 'MOCK_RECOVERY'
        self.rse4recovery_id = get_rse_id(self.rse4recovery, **self.vo)
        self.scope = 'mock'
        self.internal_scope = InternalScope(self.scope, **self.vo)

        # For testing, we create 3 files and upload them to Rucio to two test RSEs.
        self.tmp_file1 = file_generator()
        self.tmp_file2 = file_generator()
        self.tmp_file3 = file_generator()
        self.tmp_file4 = file_generator()
        self.tmp_file5 = file_generator()

        self.listdids = [{'scope': self.internal_scope, 'name': path.basename(f), 'type': DIDType.FILE}
                         for f in [self.tmp_file1, self.tmp_file2, self.tmp_file3, self.tmp_file4, self.tmp_file5]]

        for rse in [self.rse4suspicious, self.rse4recovery]:
            cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4} {5} {6}'.format(rse, self.scope, self.tmp_file1, self.tmp_file2, self.tmp_file3, self.tmp_file4, self.tmp_file5)
            exitcode, out, err = execute(cmd)

            # checking if Rucio upload went OK
            assert exitcode == 0

        # Set fictional datatypes
        set_metadata(self.internal_scope, path.basename(self.tmp_file4), 'datatype', 'testtypedeclarebad')
        set_metadata(self.internal_scope, path.basename(self.tmp_file5), 'datatype', 'testtypenopolicy')

        # Allow for the RSEs to be affected by the suspicious file recovery daemon
        add_rse_attribute(self.rse4suspicious_id, "enable_suspicious_file_recovery", True)
        add_rse_attribute(self.rse4recovery_id, "enable_suspicious_file_recovery", True)

        # removing physical files from /tmp location - keeping only their DB info
        remove(self.tmp_file1)
        remove(self.tmp_file2)
        remove(self.tmp_file3)
        remove(self.tmp_file4)
        remove(self.tmp_file5)

        # Gather replica info
        replicalist = list_replicas(dids=self.listdids)

        # Changing the replica statuses as follows:
        # ----------------------------------------------------------------------------------------------------------------------------------
        # Name         State(s) declared on MOCK_RECOVERY       State(s) declared on MOCK_SUSPICIOUS        Metadata "datatype"
        # ----------------------------------------------------------------------------------------------------------------------------------
        # tmp_file1    available                                suspicious (available)
        # tmp_file2    available                                suspicious + bad (unavailable)
        # tmp_file3    unavailable                              suspicious (available)                      RAW
        # tmp_file4    unavailable                              suspicious (available)                      testtypedeclarebad
        # tmp_file5    unavailable                              suspicious (available)                      testtypenopolicy
        # ----------------------------------------------------------------------------------------------------------------------------------

        for replica in replicalist:
            suspicious_pfns = replica['rses'][self.rse4suspicious_id]
            for i in range(3):
                print("Declaring suspicious file replica: " + suspicious_pfns[0])
                self.replica_client.declare_suspicious_file_replicas([suspicious_pfns[0], ], 'This is a good reason.')
                sleep(1)
            if replica['name'] == path.basename(self.tmp_file2):
                print("Declaring bad file replica: " + suspicious_pfns[0])
                self.replica_client.declare_bad_file_replicas([suspicious_pfns[0], ], 'This is a good reason')
            if replica['name'] == path.basename(self.tmp_file3):
                print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery_id][0])
                update_replica_state(self.rse4recovery_id, self.internal_scope, path.basename(self.tmp_file3), ReplicaState.UNAVAILABLE)
            if replica['name'] == path.basename(self.tmp_file4):
                print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery_id][0])
                update_replica_state(self.rse4recovery_id, self.internal_scope, path.basename(self.tmp_file4), ReplicaState.UNAVAILABLE)
            if replica['name'] == path.basename(self.tmp_file5):
                print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery_id][0])
                update_replica_state(self.rse4recovery_id, self.internal_scope, path.basename(self.tmp_file5), ReplicaState.UNAVAILABLE)

        # Gather replica info after setting initial replica statuses
        replicalist = list_replicas(dids=self.listdids)

        # Checking if the status changes were effective
        for replica in replicalist:
            if replica['name'] == path.basename(self.tmp_file1):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert replica['states'][self.rse4recovery_id] == 'AVAILABLE'
            if replica['name'] == path.basename(self.tmp_file2):
                assert (self.rse4suspicious_id in replica['states']) is False
                assert replica['states'][self.rse4recovery_id] == 'AVAILABLE'
            if replica['name'] == path.basename(self.tmp_file3):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert (self.rse4recovery_id in replica['states']) is False
            if replica['name'] == path.basename(self.tmp_file4):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert (self.rse4recovery_id in replica['states']) is False
            if replica['name'] == path.basename(self.tmp_file5):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert (self.rse4recovery_id in replica['states']) is False

        # Checking if only self.tmp_file2 is declared as 'BAD'
        self.from_date = datetime.now() - timedelta(days=1)
        bad_replicas_list = list_bad_replicas_status(rse_id=self.rse4suspicious_id, younger_than=self.from_date, **self.vo)
        bad_checklist = [(badf['name'], badf['rse_id'], badf['state']) for badf in bad_replicas_list]

        assert (path.basename(self.tmp_file1), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file2), self.rse4suspicious_id, BadFilesStatus.BAD) in bad_checklist
        assert (path.basename(self.tmp_file3), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file4), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file5), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist

        bad_replicas_list = list_bad_replicas_status(rse_id=self.rse4recovery_id, younger_than=self.from_date, **self.vo)
        bad_checklist = [(badf['name'], badf['rse_id'], badf['state']) for badf in bad_replicas_list]

        assert (path.basename(self.tmp_file1), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file2), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file3), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file4), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file5), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist
Example #9
0
    def test_undertaker(self):
        """ UNDERTAKER (CORE): Test the undertaker. """
        tmp_scope = InternalScope('mock', **self.vo)
        jdoe = InternalAccount('jdoe', **self.vo)
        root = InternalAccount('root', **self.vo)

        nbdatasets = 5
        nbfiles = 5
        rse = 'MOCK'
        rse_id = get_rse_id('MOCK', **self.vo)

        set_local_account_limit(jdoe, rse_id, -1)

        dsns1 = [{
            'name': 'dsn_%s' % generate_uuid(),
            'scope': tmp_scope,
            'type': 'DATASET',
            'lifetime': -1
        } for i in range(nbdatasets)]

        dsns2 = [{
            'name':
            'dsn_%s' % generate_uuid(),
            'scope':
            tmp_scope,
            'type':
            'DATASET',
            'lifetime':
            -1,
            'rules': [{
                'account': jdoe,
                'copies': 1,
                'rse_expression': rse,
                'grouping': 'DATASET'
            }]
        } for i in range(nbdatasets)]

        add_dids(dids=dsns1 + dsns2, account=root)

        # Add generic metadata on did
        try:
            set_metadata(tmp_scope, dsns1[0]['name'], "test_key", "test_value")
        except NotImplementedError:
            # add_did_meta is not Implemented for Oracle < 12
            pass

        replicas = list()
        for dsn in dsns1 + dsns2:
            files = [{
                'scope': tmp_scope,
                'name': 'file_%s' % generate_uuid(),
                'bytes': 1,
                'adler32': '0cc737eb',
                'tombstone': datetime.utcnow() + timedelta(weeks=2),
                'meta': {
                    'events': 10
                }
            } for i in range(nbfiles)]
            attach_dids(scope=tmp_scope,
                        name=dsn['name'],
                        rse_id=rse_id,
                        dids=files,
                        account=root)
            replicas += files

        add_rules(dids=dsns1,
                  rules=[{
                      'account': jdoe,
                      'copies': 1,
                      'rse_expression': rse,
                      'grouping': 'DATASET'
                  }])

        undertaker(worker_number=1, total_workers=1, once=True)
        undertaker(worker_number=1, total_workers=1, once=True)

        for replica in replicas:
            assert_not_equal(
                get_replica(scope=replica['scope'],
                            name=replica['name'],
                            rse_id=rse_id)['tombstone'], None)
Example #10
0
def atropos(thread, bulk, date_check, dry_run=True, grace_period=86400,
            once=True, unlock=False, spread_period=0, purge_replicas=False, sleep_time=60):
    """
    Creates an Atropos Worker that gets a list of rules which have an eol_at expired and delete them.

    :param thread: Thread number at startup.
    :param bulk: The number of requests to process.
    :param grace_period: The grace_period for the rules.
    :param once: Run only once.
    :param sleep_time: Thread sleep time after each chunk of work.
    """
    executable = 'atropos'
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()
    heartbeat.sanity_check(executable=executable, hostname=hostname)
    now = datetime.datetime.now()
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    time.sleep(10)
    hb = heartbeat.live(executable, hostname, pid, hb_thread)
    prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'], hb['nr_threads'])
    logger = formatted_logger(logging.log, prepend_str + '%s')
    logger(logging.DEBUG, 'Starting worker')
    summary = {}
    lifetime_exceptions = {}
    rand = random.Random(hb['assign_thread'])
    for excep in rucio.core.lifetime_exception.list_exceptions(exception_id=None, states=[LifetimeExceptionsState.APPROVED, ], session=None):
        key = '{}:{}'.format(excep['scope'].internal, excep['name'])
        if key not in lifetime_exceptions:
            lifetime_exceptions[key] = excep['expires_at']
        elif lifetime_exceptions[key] < excep['expires_at']:
            lifetime_exceptions[key] = excep['expires_at']
    logger(logging.DEBUG, '%s active exceptions' % len(lifetime_exceptions))
    if not dry_run and date_check > now:
        logger(logging.ERROR, 'Atropos cannot run in non-dry-run mode for date in the future')
    else:
        while not GRACEFUL_STOP.is_set():

            hb = heartbeat.live(executable, hostname, pid, hb_thread)
            prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'], hb['nr_threads'])
            logger = formatted_logger(logging.log, prepend_str + '%s')

            stime = time.time()
            try:
                rules = get_rules_beyond_eol(date_check, thread, hb['nr_threads'], session=None)
                logger(logging.INFO, '%s rules to process' % (len(rules)))
                for rule_idx, rule in enumerate(rules, start=1):
                    did = '%s:%s' % (rule.scope, rule.name)
                    did_key = '{}:{}'.format(rule.scope.internal, rule.name)
                    logger(logging.DEBUG, 'Working on rule %s on DID %s on %s' % (rule.id, did, rule.rse_expression))

                    if (rule_idx % 1000) == 0:
                        logger(logging.INFO, '%s/%s rules processed' % (rule_idx, len(rules)))

                    # We compute the expected eol_at
                    try:
                        rses = parse_expression(rule.rse_expression, filter_={'vo': rule.account.vo})
                    except InvalidRSEExpression:
                        logger(logging.WARNING, 'Rule %s has an RSE expression that results in an empty set: %s' % (rule.id, rule.rse_expression))
                        continue
                    eol_at = rucio.core.lifetime_exception.define_eol(rule.scope, rule.name, rses)
                    if eol_at != rule.eol_at:
                        logger(logging.WARNING, 'The computed eol %s differs from the one recorded %s for rule %s on %s at %s' % (eol_at, rule.eol_at, rule.id,
                                                                                                                                  did, rule.rse_expression))
                        try:
                            update_rule(rule.id, options={'eol_at': eol_at})
                        except RuleNotFound:
                            logger(logging.WARNING, 'Cannot find rule %s on DID %s' % (rule.id, did))
                            continue

                    # Check the exceptions
                    if did_key in lifetime_exceptions:
                        if eol_at > lifetime_exceptions[did_key]:
                            logger(logging.INFO, 'Rule %s on DID %s on %s has longer expiration date than the one requested : %s' % (rule.id, did, rule.rse_expression,
                                                                                                                                     lifetime_exceptions[did_key]))
                        else:
                            # If eol_at < requested extension, update eol_at
                            logger(logging.INFO, 'Updating rule %s on DID %s on %s according to the exception till %s' % (rule.id, did, rule.rse_expression,
                                                                                                                          lifetime_exceptions[did_key]))
                            eol_at = lifetime_exceptions[did_key]
                            try:
                                update_rule(rule.id, options={'eol_at': lifetime_exceptions[did_key]})
                            except RuleNotFound:
                                logger(logging.WARNING, 'Cannot find rule %s on DID %s' % (rule.id, did))
                                continue

                    # Now check that the new eol_at is expired
                    if eol_at and eol_at <= date_check:
                        set_metadata(scope=rule.scope, name=rule.name, key='eol_at', value=eol_at)
                        no_locks = True
                        for lock in get_dataset_locks(rule.scope, rule.name):
                            if lock['rule_id'] == rule[4]:
                                no_locks = False
                                if lock['rse_id'] not in summary:
                                    summary[lock['rse_id']] = {}
                                if did_key not in summary[lock['rse_id']]:
                                    summary[lock['rse_id']][did_key] = {'length': lock['length'] or 0, 'bytes': lock['bytes'] or 0}
                        if no_locks:
                            logger(logging.WARNING, 'Cannot find a lock for rule %s on DID %s' % (rule.id, did))
                        if not dry_run:
                            lifetime = grace_period + rand.randrange(spread_period + 1)
                            logger(logging.INFO, 'Setting %s seconds lifetime for rule %s' % (lifetime, rule.id))
                            options = {'lifetime': lifetime}
                            if purge_replicas:
                                options['purge_replicas'] = True
                            if rule.locked and unlock:
                                logger(logging.INFO, 'Unlocking rule %s', rule.id)
                                options['locked'] = False
                            try:
                                update_rule(rule.id, options=options)
                            except RuleNotFound:
                                logger(logging.WARNING, 'Cannot find rule %s on DID %s' % (rule.id, did))
                                continue
            except Exception:
                exc_type, exc_value, exc_traceback = exc_info()
                logger(logging.CRITICAL, ''.join(format_exception(exc_type, exc_value, exc_traceback)).strip())

            for rse_id in summary:
                tot_size, tot_files, tot_datasets = 0, 0, 0
                for did in summary[rse_id]:
                    tot_datasets += 1
                    tot_files += summary[rse_id][did].get('length', 0)
                    tot_size += summary[rse_id][did].get('bytes', 0)
                vo = get_rse_vo(rse_id=rse_id)
                logger(logging.INFO, 'For RSE %s %s %s datasets will be deleted representing %s files and %s bytes' % (get_rse_name(rse_id=rse_id),
                                                                                                                       '' if vo == 'def' else 'on VO ' + vo,
                                                                                                                       tot_datasets,
                                                                                                                       tot_files,
                                                                                                                       tot_size))

            if once:
                break
            else:
                daemon_sleep(start_time=stime, sleep_time=sleep_time, graceful_stop=GRACEFUL_STOP)

        logger(logging.INFO, 'Graceful stop requested')
        heartbeat.die(executable, hostname, pid, hb_thread)
        logger(logging.INFO, 'Graceful stop done')
Example #11
0
def test_bb8_full_workflow(vo, root_account, jdoe_account, rse_factory,
                           mock_scope, did_factory):
    """BB8: Test the rebalance rule method"""
    config_core.set(section='bb8', option='allowed_accounts', value='jdoe')
    tot_rses = 4
    rses = [rse_factory.make_posix_rse() for _ in range(tot_rses)]
    rse1, rse1_id = rses[0]
    rse2, rse2_id = rses[1]
    rse3, rse3_id = rses[2]
    rse4, rse4_id = rses[3]

    # Add Tags
    # RSE 1 and 2 nmatch expression T1=true
    # RSE 3 and 4 nmatch expression T2=true
    T1 = tag_generator()
    T2 = tag_generator()
    add_rse_attribute(rse1_id, T1, True)
    add_rse_attribute(rse2_id, T1, True)
    add_rse_attribute(rse3_id, T2, True)
    add_rse_attribute(rse4_id, T2, True)

    # Add fake weights
    add_rse_attribute(rse1_id, "fakeweight", 10)
    add_rse_attribute(rse2_id, "fakeweight", 0)
    add_rse_attribute(rse3_id, "fakeweight", 0)
    add_rse_attribute(rse4_id, "fakeweight", 0)
    add_rse_attribute(rse1_id, "freespace", 1)
    add_rse_attribute(rse2_id, "freespace", 1)
    add_rse_attribute(rse3_id, "freespace", 1)
    add_rse_attribute(rse4_id, "freespace", 1)

    # Add quota
    set_local_account_limit(jdoe_account, rse1_id, -1)
    set_local_account_limit(jdoe_account, rse2_id, -1)
    set_local_account_limit(jdoe_account, rse3_id, -1)
    set_local_account_limit(jdoe_account, rse4_id, -1)

    set_local_account_limit(root_account, rse1_id, -1)
    set_local_account_limit(root_account, rse2_id, -1)
    set_local_account_limit(root_account, rse3_id, -1)
    set_local_account_limit(root_account, rse4_id, -1)

    # Invalid the cache because the result of parse_expression is cached
    REGION.invalidate()

    tot_datasets = 4
    # Create a list of datasets
    datasets = [did_factory.make_dataset() for _ in range(tot_datasets)]
    dsn = [dataset['name'] for dataset in datasets]

    rules = list()

    base_unit = 100000000000
    nb_files1 = 7
    nb_files2 = 5
    nb_files3 = 3
    nb_files4 = 2
    file_size = 1 * base_unit
    rule_to_rebalance = None

    # Add one secondary file
    files = create_files(1, mock_scope, rse1_id, bytes_=1)
    add_rule(dids=[{
        'scope': mock_scope,
        'name': files[0]['name']
    }],
             account=jdoe_account,
             copies=1,
             rse_expression=rse1,
             grouping='DATASET',
             weight=None,
             lifetime=-86400,
             locked=False,
             subscription_id=None)[0]
    for cnt in range(3, tot_rses):
        add_replicas(rses[cnt][1], files, jdoe_account)
        add_rule(dids=[{
            'scope': mock_scope,
            'name': files[0]['name']
        }],
                 account=jdoe_account,
                 copies=1,
                 rse_expression=rses[cnt][0],
                 grouping='DATASET',
                 weight=None,
                 lifetime=-86400,
                 locked=False,
                 subscription_id=None)[0]
    rule_cleaner(once=True)

    # Create dataset 1 of 800 GB and create a rule on RSE 1 and RSE 3
    files = create_files(nb_files1, mock_scope, rse1_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[0], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[0]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse1,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    add_replicas(rse3_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[0]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse3,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    # Create dataset 2 of 500 GB and create a rule on RSE 1 and RSE 2
    files = create_files(nb_files2, mock_scope, rse1_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[1], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[1]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse1,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    add_replicas(rse2_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[1]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse2,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    # Create dataset 3 of 300 GB and create a rule on RSE 1. The copy on RSE 3 is secondary
    files = create_files(nb_files3, mock_scope, rse1_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[2], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[2]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse1,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rule_to_rebalance = rule_id
    rules.append(rule_id)

    add_replicas(rse3_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[2]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse3,
                       grouping='DATASET',
                       weight=None,
                       lifetime=-86400,
                       locked=False,
                       subscription_id=None)[0]
    rule_cleaner(once=True)
    try:
        rule = get_rule(rule_id)
    except:
        pytest.raises(RuleNotFound, get_rule, rule_id)

    # Create dataset 4 of 200 GB and create a rule on RSE 3. The copy on RSE 2 is secondary
    files = create_files(nb_files4, mock_scope, rse3_id, bytes_=file_size)
    attach_dids(mock_scope, dsn[3], files, jdoe_account)

    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[3]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse3,
                       grouping='DATASET',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)[0]
    rules.append(rule_id)

    add_replicas(rse2_id, files, jdoe_account)
    rule_id = add_rule(dids=[{
        'scope': mock_scope,
        'name': dsn[3]
    }],
                       account=jdoe_account,
                       copies=1,
                       rse_expression=rse2,
                       grouping='DATASET',
                       weight=None,
                       lifetime=-86400,
                       locked=False,
                       subscription_id=None)[0]
    rule_cleaner(once=True)
    try:
        rule = get_rule(rule_id)
    except:
        pytest.raises(RuleNotFound, get_rule, rule_id)

    for dataset in dsn:
        set_status(mock_scope, dataset, open=False)

    for rse in rses:
        fill_rse_expired(rse[1])
        set_rse_usage(rse_id=rse[1],
                      source='min_free_space',
                      used=2 * base_unit,
                      free=2 * base_unit,
                      session=None)
        set_rse_usage(rse_id=rse[1],
                      source='storage',
                      used=15 * base_unit,
                      free=2 * base_unit,
                      session=None)
    set_rse_usage(rse_id=rse2_id,
                  source='min_free_space',
                  used=1 * base_unit,
                  free=1 * base_unit,
                  session=None)
    set_rse_usage(rse_id=rse2_id,
                  source='storage',
                  used=6 * base_unit,
                  free=5 * base_unit,
                  session=None)

    run_abacus(once=True, threads=1, fill_history_table=False, sleep_time=10)
    # Summary :
    # RSE 1 : 1500 GB primary + 1 B secondary
    tot_space = [
        src for src in get_rse_usage(rse1_id) if src['source'] == 'rucio'
    ][0]
    expired = [
        src for src in get_rse_usage(rse1_id) if src['source'] == 'expired'
    ][0]
    assert tot_space['used'] == (nb_files1 + nb_files2 +
                                 nb_files3) * file_size + 1
    assert expired['used'] == 1
    # RSE 2 : 500 GB primary + 100 GB secondary
    tot_space = [
        src for src in get_rse_usage(rse2_id) if src['source'] == 'rucio'
    ][0]
    expired = [
        src for src in get_rse_usage(rse2_id) if src['source'] == 'expired'
    ][0]
    assert tot_space['used'] == (nb_files2 + nb_files4) * file_size
    assert expired['used'] == nb_files4 * file_size
    # Total primary on T1=true : 2000 GB
    # Total secondary on T1=true : 200 GB
    # Ratio secondary / primary = 10  %
    # Ratio on RSE 1 : 0 %
    # Ratio on RSE 2 : 40 %

    # Now run BB8

    re_evaluator(once=True, sleep_time=30, did_limit=100)
    bb8_run(once=True,
            rse_expression='%s=true' % str(T1),
            move_subscriptions=False,
            use_dump=False,
            sleep_time=300,
            threads=1,
            dry_run=False)

    for rule_id in rules:
        rule = get_rule(rule_id)
        if rule_id != rule_to_rebalance:
            assert (rule['child_rule_id'] is None)
        else:
            assert (rule['child_rule_id'] is not None)
            assert (
                rule['expires_at'] <= datetime.utcnow() + timedelta(seconds=1)
            )  # timedelta needed to prevent failure due to rounding effects
            child_rule_id = rule['child_rule_id']
            child_rule = get_rule(child_rule_id)
            assert (child_rule['rse_expression'] == rse2)
            # For teardown, delete child rule
            update_rule(child_rule_id, {'lifetime': -86400})
    rule_cleaner(once=True)

    for dataset in dsn:
        set_metadata(mock_scope, dataset, 'lifetime', -86400)
    undertaker.run(once=True)
Example #12
0
        assert_equal(now, get_did_atime(scope=tmp_scope, name=tmp_dsn1))
        assert_equal(None, get_did_atime(scope=tmp_scope, name=tmp_dsn2))

    def test_update_dids(self):
        """ DATA IDENTIFIERS (CORE): Update file size and checksum"""
        tmp_scope = 'mock'
        dsn = 'dsn_%s' % generate_uuid()
        lfn = 'lfn.%s' % str(generate_uuid())
        add_did(scope=tmp_scope, name=dsn, type=DIDType.DATASET, account='root')

        files = [{'scope': tmp_scope, 'name': lfn,
                  'bytes': 724963570L, 'adler32': '0cc737eb',
                  'meta': {'guid': str(generate_uuid()), 'events': 100}}]
        attach_dids(scope=tmp_scope, name=dsn, rse='MOCK', dids=files, account='root')

        set_metadata(scope=tmp_scope, name=lfn, key='adler32', value='0cc737ee')
        assert_equal(get_metadata(scope=tmp_scope, name=lfn)['adler32'], '0cc737ee')

        with assert_raises(UnsupportedOperation):
            set_metadata(scope=tmp_scope, name='Nimportnawak', key='adler32', value='0cc737ee')

        set_metadata(scope=tmp_scope, name=lfn, key='bytes', value=724963577L)
        assert_equal(get_metadata(scope=tmp_scope, name=lfn)['bytes'], 724963577L)

    def test_get_did_with_dynamic(self):
        """ DATA IDENTIFIERS (CORE): Get did with dynamic resolve of size"""
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        tmp_dsn3 = 'dsn_%s' % generate_uuid()
        tmp_dsn4 = 'dsn_%s' % generate_uuid()
Example #13
0
def test_lifetime_creation_core(root_account, rse_factory, mock_scope,
                                did_factory):
    """
    Test the creation of a lifetime exception on the core side
    """
    nb_datatype = 3
    nb_datasets = 2 * nb_datatype
    yesterday = datetime.now() - timedelta(days=1)
    tomorrow = datetime.now() + timedelta(days=1)
    rse, rse_id = rse_factory.make_posix_rse()
    datasets = [did_factory.make_dataset() for _ in range(nb_datasets)]
    metadata = [str(uuid()) for _ in range(nb_datatype)]
    list_dids = []
    for cnt, meta in enumerate(metadata):
        dids = []
        for dataset in datasets[2 * cnt:2 * (cnt + 1)]:
            set_metadata(dataset['scope'], dataset['name'], 'datatype', meta)
            if cnt < nb_datatype - 1:
                set_metadata(dataset['scope'], dataset['name'], 'eol_at',
                             yesterday)
            dids.append((dataset['scope'], dataset['name']))
        dids.sort()
        list_dids.append(dids)
    datasets.extend([{
        'scope': mock_scope,
        'name': 'dataset_%s' % str(uuid()),
        'did_type': DIDType.DATASET
    } for _ in range(2)])

    # Test with cutoff_date not defined
    try:
        config_core.remove_option('lifetime_model', 'cutoff_date')
    except (ConfigNotFound, NoSectionError):
        pass

    with pytest.raises(UnsupportedOperation):
        add_exception(datasets,
                      root_account,
                      pattern='wekhewfk',
                      comments='This is a comment',
                      expires_at=datetime.now())

    # Test with cutoff_date wrongly defined
    config_core.set(section='lifetime_model',
                    option='cutoff_date',
                    value='wrong_value')
    config_core.get(section='lifetime_model',
                    option='cutoff_date',
                    default=None,
                    use_cache=False)
    with pytest.raises(UnsupportedOperation):
        add_exception(datasets,
                      root_account,
                      pattern='wekhewfk',
                      comments='This is a comment',
                      expires_at=datetime.now())

    # Test with cutoff_date properly defined
    tomorrow = tomorrow.strftime('%Y-%m-%d')
    config_core.set(section='lifetime_model',
                    option='cutoff_date',
                    value=tomorrow)
    config_core.get(section='lifetime_model',
                    option='cutoff_date',
                    default=None,
                    use_cache=False)
    result = add_exception(datasets,
                           root_account,
                           pattern='wekhewfk',
                           comments='This is a comment',
                           expires_at=datetime.now())

    # Check if the Not Existing DIDs are identified
    result_unknown = [(entry['scope'], entry['name'])
                      for entry in result['unknown']]
    result_unknown.sort()
    unknown = [(entry['scope'], entry['name'])
               for entry in datasets[nb_datasets:nb_datasets + 2]]
    unknown.sort()
    assert result_unknown == unknown

    # Check if the DIDs not affected by the Lifetime Model are identified
    result_not_affected = [(entry['scope'], entry['name'])
                           for entry in result['not_affected']]
    result_not_affected.sort()
    not_affected = list_dids[-1]
    assert result_not_affected == not_affected

    # Check if an exception was done for each datatype
    list_exceptions = list()
    for exception_id in result['exceptions']:
        dids = [(entry['scope'], entry['name'])
                for entry in result['exceptions'][exception_id]]
        dids.sort()
        list_exceptions.append(dids)

    for did in list_dids[:nb_datatype - 1]:
        assert did in list_exceptions
Example #14
0
        assert_equal(now, get_did_atime(scope=tmp_scope, name=tmp_dsn1))
        assert_equal(None, get_did_atime(scope=tmp_scope, name=tmp_dsn2))

    def test_update_dids(self):
        """ DATA IDENTIFIERS (CORE): Update file size and checksum"""
        tmp_scope = 'mock'
        dsn = 'dsn_%s' % generate_uuid()
        lfn = 'lfn.%s' % str(generate_uuid())
        add_did(scope=tmp_scope, name=dsn, type=DIDType.DATASET, account='root')

        files = [{'scope': tmp_scope, 'name': lfn,
                  'bytes': 724963570L, 'adler32': '0cc737eb',
                  'meta': {'guid': str(generate_uuid()), 'events': 100}}]
        attach_dids(scope=tmp_scope, name=dsn, rse='MOCK', dids=files, account='root')

        set_metadata(scope=tmp_scope, name=lfn, key='adler32', value='0cc737ee')
        assert_equal(get_metadata(scope=tmp_scope, name=lfn)['adler32'], '0cc737ee')

        with assert_raises(UnsupportedOperation):
            set_metadata(scope=tmp_scope, name='Nimportnawak', key='adler32', value='0cc737ee')

        set_metadata(scope=tmp_scope, name=lfn, key='bytes', value=724963577L)
        assert_equal(get_metadata(scope=tmp_scope, name=lfn)['bytes'], 724963577L)


class TestDIDApi:

    def test_list_new_dids(self):
        """ DATA IDENTIFIERS (API): List new identifiers """
        tmp_scope = scope_name_generator()
        tmp_dsn = 'dsn_%s' % generate_uuid()
Example #15
0
def lfn2pfn_DUNE(scope, name, rse, rse_attrs, protocol_attrs):
    global metacat_base

    from rucio.common import config
    from rucio.common.types import InternalScope
    from rucio.rse import rsemanager
    from metacat.webapi import MetaCatClient

    # current URL: https://metacat.fnal.gov:9443/dune_meta_demo/app
    metacat_url = config.config_get(
        'policy', 'metacat_base_url') or os.environ.get("METACAT_SERVER_URL")
    if metacat_url is None:
        raise ValueError("MetaCat client URL is not configured")

    metacat_client = MetaCatClient(metacat_url)

    def get_metadata_field(metadata, field):
        if field in metadata:
            return metadata[field]
        return 'None'

    # check to see if PFN is already cached in Rucio's metadata system
    didclient = None
    didmd = {}
    internal_scope = InternalScope(scope)
    if getattr(rsemanager, 'CLIENT_MODE', None):
        from rucio.client.didclient import DIDClient
        didclient = DIDClient()
        didmd = didclient.get_metadata(internal_scope, name)
    if getattr(rsemanager, 'SERVER_MODE', None):
        from rucio.core.did import get_metadata
        didmd = get_metadata(internal_scope, name)

    # if it is, just return it
    md_key = 'PFN_' + rse
    if md_key in didmd:
        return didmd[md_key]

    lfn = scope + ':' + name
    jsondata = metacat_client.get_file(name=lfn)
    metadata = jsondata["metadata"]

    # determine year from timestamps
    timestamp = None
    if 'core.start_time' in metadata:
        timestamp = metadata['core.start_time']
    elif 'core.end_time' in metadata:
        timestamp = metadata['core.end_time']
    elif 'created_timestamp' in jsondata:
        timestamp = jsondata['created_timestamp']
    if timestamp is None:
        year = 'None'
    else:
        dt = datetime.utcfromtimestamp(timestamp)
        year = str(dt.year)

    # determine hashes from run number
    run_number = 0
    if 'core.runs' in metadata:
        run_number = int(metadata['core.runs'][0])

    hash1 = "%02d" % ((run_number // 1000000) % 100)
    hash2 = "%02d" % ((run_number // 10000) % 100)
    hash3 = "%02d" % ((run_number // 100) % 100)
    hash4 = "%02d" % (run_number % 100)

    run_type = get_metadata_field(metadata, 'core.run_type')
    data_tier = get_metadata_field(metadata, 'core.data_tier')
    file_type = get_metadata_field(metadata, 'core.file_type')
    data_stream = get_metadata_field(metadata, 'core.data_stream')
    data_campaign = get_metadata_field(metadata, 'DUNE.campaign')
    filename = name

    pfn = 'pnfs/dune/tape_backed/dunepro/' + run_type + '/' + data_tier + '/' + year + '/' + file_type + '/' + data_stream + '/' + data_campaign + '/' + hash1 + '/' + hash2 + '/' + hash3 + '/' + hash4 + '/' + filename

    # store the PFN in Rucio metadata for next time
    if getattr(rsemanager, 'CLIENT_MODE', None):
        didclient.set_metadata(internal_scope, name, md_key, pfn)
    if getattr(rsemanager, 'SERVER_MODE', None):
        from rucio.core.did import set_metadata
        set_metadata(internal_scope, name, md_key, pfn)

    return pfn