Beispiel #1
0
    def test_get_did_with_dynamic(self):
        """ DATA IDENTIFIERS (CORE): Get did with dynamic resolve of size"""
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        tmp_dsn3 = 'dsn_%s' % generate_uuid()
        tmp_dsn4 = 'dsn_%s' % generate_uuid()

        add_did(scope=tmp_scope,
                name=tmp_dsn1,
                type=DIDType.DATASET,
                account='root')
        add_replica(rse='MOCK',
                    scope=tmp_scope,
                    name=tmp_dsn2,
                    bytes=10,
                    account='root')
        add_replica(rse='MOCK',
                    scope=tmp_scope,
                    name=tmp_dsn3,
                    bytes=10,
                    account='root')
        attach_dids(scope=tmp_scope,
                    name=tmp_dsn1,
                    dids=[{
                        'scope': tmp_scope,
                        'name': tmp_dsn2
                    }, {
                        'scope': tmp_scope,
                        'name': tmp_dsn3
                    }],
                    account='root')

        add_did(scope=tmp_scope,
                name=tmp_dsn4,
                type=DIDType.CONTAINER,
                account='root')
        attach_dids(scope=tmp_scope,
                    name=tmp_dsn4,
                    dids=[{
                        'scope': tmp_scope,
                        'name': tmp_dsn1
                    }],
                    account='root')

        assert_equal(
            get_did(scope=tmp_scope, name=tmp_dsn1, dynamic=True)['bytes'], 20)
        assert_equal(
            get_did(scope=tmp_scope, name=tmp_dsn4, dynamic=True)['bytes'], 20)
    def check_did(self, did):
        decision = {'did': ':'.join(did)}
        if (self._added_cache.check_dataset(':'.join(did))):
            decision['error_reason'] = 'already added replica for this did in the last 24h'
            return decision

        if (not did[0].startswith('data')) and (not did[0].startswith('mc')):
            decision['error_reason'] = 'not a data or mc dataset'
            return decision

        datatype = did[1].split('.')[4].split('_')[0]

        if datatype not in self._datatypes:
            decision['error_reason'] = 'wrong datatype'
            return decision

        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        total_added_bytes = sum(self._added_bytes.get_series('total'))
        total_added_files = sum(self._added_files.get_series('total'))

        logging.debug("total_added_bytes: %d" % total_added_bytes)
        logging.debug("total_added_files: %d" % total_added_files)

        if ((total_added_bytes + meta['bytes']) > self._max_bytes_hour):
            decision['error_reason'] = 'above bytes limit of %d bytes' % self._max_bytes_hour
            return decision
        if ((total_added_files + meta['length']) > self._max_files_hour):
            decision['error_reason'] = 'above files limit of %d files' % self._max_files_hour
            return decision

        last_accesses = self._dc.get_did(did)
        self._dc.add_did(did)

        decision['last_accesses'] = last_accesses

        try:
            pop = get_popularity(did)
            decision['popularity'] = pop or 0.0
        except Exception:
            decision['error_reason'] = 'problems connecting to ES'
            return decision

        if (last_accesses < self._min_recent_requests) and (pop < self._min_popularity):
            decision['error_reason'] = 'did not popular enough'
            return decision

        return decision
Beispiel #3
0
    def test_delete_replicas_from_datasets(self):
        """ REPLICA (CORE): Delete replicas from dataset """
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        nbfiles = 5
        files1 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]

        add_did(scope=tmp_scope,
                name=tmp_dsn1,
                type=DIDType.DATASET,
                account='root')
        add_did(scope=tmp_scope,
                name=tmp_dsn2,
                type=DIDType.DATASET,
                account='root')

        attach_dids(scope=tmp_scope,
                    name=tmp_dsn1,
                    rse='MOCK',
                    dids=files1,
                    account='root')
        attach_dids(scope=tmp_scope,
                    name=tmp_dsn2,
                    dids=files1,
                    account='root')

        set_status(scope=tmp_scope, name=tmp_dsn1, open=False)

        delete_replicas(rse='MOCK', files=files1)

        with assert_raises(DataIdentifierNotFound):
            get_did(scope=tmp_scope, name=tmp_dsn1)

        get_did(scope=tmp_scope, name=tmp_dsn2)

        assert_equal([f for f in list_files(scope=tmp_scope, name=tmp_dsn2)],
                     [])
Beispiel #4
0
    def test_delete_replicas(self):
        """ REPLICA (CORE): Delete replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files1 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        add_replicas(rse='MOCK',
                     files=files1,
                     account='root',
                     ignore_availability=True)

        files2 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        add_replicas(rse='MOCK',
                     files=files2,
                     account='root',
                     ignore_availability=True)
        add_replicas(rse='MOCK3',
                     files=files2,
                     account='root',
                     ignore_availability=True)

        delete_replicas(rse='MOCK', files=files1 + files2)

        for file in files1:
            with assert_raises(DataIdentifierNotFound):
                print(get_did(scope=file['scope'], name=file['name']))

        for file in files2:
            get_did(scope=file['scope'], name=file['name'])
Beispiel #5
0
Datei: did.py Projekt: kbg/rucio
def get_did(scope, name, dynamic=False):
    """
    Retrieve a single data did.

    :param scope: The scope name.
    :param name: The data identifier name.
    :return did: Dictionary containing {'name', 'scope', 'type'}, Exception otherwise
    """

    return did.get_did(scope=scope, name=name, dynamic=dynamic)
Beispiel #6
0
def get_did(scope, name):
    """
    Retrieve a single data did.

    :param scope: The scope name.
    :param name: The data identifier name.
    :return did: Dictionary containing {'name', 'scope', 'type'}, Exception otherwise
    """

    return did.get_did(scope=scope, name=name)
Beispiel #7
0
    def test_get_did_with_dynamic(self):
        """ DATA IDENTIFIERS (CORE): Get did with dynamic resolve of size"""
        tmp_scope = InternalScope('mock', **self.vo)
        root = InternalAccount('root', **self.vo)
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        tmp_dsn3 = 'dsn_%s' % generate_uuid()
        tmp_dsn4 = 'dsn_%s' % generate_uuid()

        rse_id = get_rse_id(rse='MOCK', **self.vo)

        add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account=root)
        add_replica(rse_id=rse_id, scope=tmp_scope, name=tmp_dsn2, bytes=10, account=root)
        add_replica(rse_id=rse_id, scope=tmp_scope, name=tmp_dsn3, bytes=10, account=root)
        attach_dids(scope=tmp_scope, name=tmp_dsn1, dids=[{'scope': tmp_scope, 'name': tmp_dsn2}, {'scope': tmp_scope, 'name': tmp_dsn3}], account=root)

        add_did(scope=tmp_scope, name=tmp_dsn4, type=DIDType.CONTAINER, account=root)
        attach_dids(scope=tmp_scope, name=tmp_dsn4, dids=[{'scope': tmp_scope, 'name': tmp_dsn1}], account=root)

        assert get_did(scope=tmp_scope, name=tmp_dsn1, dynamic=True)['bytes'] == 20
        assert get_did(scope=tmp_scope, name=tmp_dsn4, dynamic=True)['bytes'] == 20
Beispiel #8
0
def get_did(scope, name, dynamic=False, vo='def'):
    """
    Retrieve a single data did.

    :param scope: The scope name.
    :param name: The data identifier name.
    :param dynamic:  Dynamically resolve the bytes and length of the did
    :param vo: The VO to act on.
    :return did: Dictionary containing {'name', 'scope', 'type'}, Exception otherwise
    """

    scope = InternalScope(scope, vo=vo)

    d = did.get_did(scope=scope, name=name, dynamic=dynamic)
    return api_update_return_dict(d)
Beispiel #9
0
    def test_atlas_archival_policy(self):
        """ UNDERTAKER (CORE): Test the atlas archival policy. """
        tmp_scope = 'mock'
        nbdatasets = 5
        nbfiles = 5

        rse = 'LOCALGROUPDISK_%s' % rse_name_generator()
        add_rse(rse)

        set_account_limit('jdoe', get_rse_id(rse), -1)

        dsns2 = [{'name': 'dsn_%s' % generate_uuid(),
                  'scope': tmp_scope,
                  'type': 'DATASET',
                  'lifetime': -1,
                  'rules': [{'account': 'jdoe', 'copies': 1,
                             'rse_expression': rse,
                             'grouping': 'DATASET'}]} for i in range(nbdatasets)]

        add_dids(dids=dsns2, account='root')

        replicas = list()
        for dsn in dsns2:
            files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1,
                      'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': {'events': 10}} for i in range(nbfiles)]
            attach_dids(scope=tmp_scope, name=dsn['name'], rse=rse, dids=files, account='root')
            replicas += files

        undertaker(worker_number=1, total_workers=1, once=True)

        for replica in replicas:
            assert(get_replica(scope=replica['scope'], name=replica['name'], rse=rse)['tombstone'] is None)

        for dsn in dsns2:
            assert(get_did(scope='archive', name=dsn['name'])['name'] == dsn['name'])
            assert(len([x for x in list_rules(filters={'scope': 'archive', 'name': dsn['name']})]) == 1)
Beispiel #10
0
    def place(self, did):
        self.__update_penalties()
        decision = {'did': ':'.join(did)}
        if (not did[0].startswith('data')) and (not did[0].startswith('mc')):
            decision['error_reason'] = 'not a data or mc dataset'
            return decision

        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' %
                      (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        last_accesses = self._dc.get_did(did)
        self._dc.add_did(did)

        decision['last_accesses'] = last_accesses

        pop = get_popularity(did)
        decision['popularity'] = pop or 0.0

        if (last_accesses < 5) and (pop < 10.0):
            decision['error_reason'] = 'did not popular enough'
            return decision

        free_rses = self._rses
        available_reps = []
        reps = list_dataset_replicas(did[0], did[1])
        num_reps = 0
        for rep in reps:
            rse_attr = list_rse_attributes(rep['rse'])
            if 'type' not in rse_attr:
                continue
            if rse_attr['type'] != 'DATADISK':
                continue
            if rep['state'] == ReplicaState.AVAILABLE:
                if rep['rse'] in free_rses:
                    free_rses.remove(rep['rse'])
                available_reps.append(rep['rse'])
                num_reps += 1

        decision['replica_rses'] = available_reps
        decision['num_replicas'] = num_reps
        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        rse_ratios = {}
        space_info = self._fsc.get_rse_space()
        for rse in free_rses:
            rse_space = space_info[rse]
            penalty = self._penalties[rse]
            rse_ratios[rse] = float(rse_space['free']) / float(
                rse_space['total']) * 100.0 / penalty

        sorted_rses = sorted(rse_ratios.items(),
                             key=itemgetter(1),
                             reverse=True)
        decision['destination_rse'] = sorted_rses[0][0]
        decision['rse_ratios'] = sorted_rses
        self._penalties[sorted_rses[0][0]] = 10.0

        return decision
Beispiel #11
0
    def test_atlas_archival_policy(self):
        """ UNDERTAKER (CORE): Test the atlas archival policy. """
        if get_policy() != 'atlas':
            LOG.info("Skipping atlas-specific test")
            return

        tmp_scope = InternalScope('mock', **self.vo)
        jdoe = InternalAccount('jdoe', **self.vo)
        root = InternalAccount('root', **self.vo)

        nbdatasets = 5
        nbfiles = 5

        rse = 'LOCALGROUPDISK_%s' % rse_name_generator()
        rse_id = add_rse(rse, **self.vo)

        set_local_account_limit(jdoe, rse_id, -1)

        dsns2 = [{
            'name':
            'dsn_%s' % generate_uuid(),
            'scope':
            tmp_scope,
            'type':
            'DATASET',
            'lifetime':
            -1,
            'rules': [{
                'account': jdoe,
                'copies': 1,
                'rse_expression': rse,
                'grouping': 'DATASET'
            }]
        } for _ in range(nbdatasets)]

        add_dids(dids=dsns2, account=root)

        replicas = list()
        for dsn in dsns2:
            files = [{
                'scope': tmp_scope,
                'name': 'file_%s' % generate_uuid(),
                'bytes': 1,
                'adler32': '0cc737eb',
                'tombstone': datetime.utcnow() + timedelta(weeks=2),
                'meta': {
                    'events': 10
                }
            } for _ in range(nbfiles)]
            attach_dids(scope=tmp_scope,
                        name=dsn['name'],
                        rse_id=rse_id,
                        dids=files,
                        account=root)
            replicas += files

        undertaker(worker_number=1, total_workers=1, once=True)

        for replica in replicas:
            assert (get_replica(scope=replica['scope'],
                                name=replica['name'],
                                rse_id=rse_id)['tombstone'] is None)

        for dsn in dsns2:
            assert (get_did(scope=InternalScope('archive', **self.vo),
                            name=dsn['name'])['name'] == dsn['name'])
            assert (len([
                x for x in list_rules(
                    filters={
                        'scope': InternalScope('archive', **self.vo),
                        'name': dsn['name']
                    })
            ]) == 1)
Beispiel #12
0
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account, caches_mock, file_config_mock):
    [cache_region] = caches_mock
    rse_name, rse_id = rse_factory.make_mock_rse()
    scope = mock_scope
    account = root_account

    # Create 2 archives and 4 files:
    # - One only exists in the first archive
    # - One in both, plus another replica, which is not in an archive
    # - One in both, plus another replica, which is not in an archive; and this replica has expired
    # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica
    # Also add these files to datasets, one of which will be removed at the end
    nb_constituents = 4
    nb_c_outside_archive = nb_constituents - 1
    constituent_size = 2000
    archive_size = 1000
    uuid = str(generate_uuid())
    constituents = [{'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i)} for i in range(nb_constituents)]
    did_factory.register_dids(constituents)
    c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents

    replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, **c_with_replica)

    replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size,
                             tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica)

    replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size,
                             tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule)
    rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE',
                       weight=None, lifetime=None, locked=False, subscription_id=None)

    archive1, archive2 = [{'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i)} for i in range(2)]
    replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive1)
    replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive2)
    did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in constituents],
                         account=account, **archive1)
    did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule]],
                         account=account, **archive2)

    dataset1, dataset2 = [{'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i)} for i in range(2)]
    did_core.add_did(did_type='DATASET', account=account, **dataset1)
    did_core.attach_dids(dids=constituents, account=account, **dataset1)
    did_core.add_did(did_type='DATASET', account=account, **dataset2)
    did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2)

    @read_session
    def __get_archive_contents_history_count(archive, session=None):
        return session.query(ConstituentAssociationHistory).filter_by(**archive).count()

    # Run reaper the first time.
    # the expired non-archive replica of c_with_expired_replica must be removed,
    # but the did must not be removed, and it must still remain in the dataset because
    # it still has the replica from inside the archive
    assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica)
    cache_region.invalidate()
    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size)
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    for did in constituents + [archive1, archive2]:
        assert did_core.get_did(**did)
    for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]:
        assert replica_core.get_replica(rse_id=rse_id, **did)
    with pytest.raises(ReplicaNotFound):
        # The replica is only on the archive, not on the constituent
        replica_core.get_replica(rse_id=rse_id, **c_first_archive_only)
    with pytest.raises(ReplicaNotFound):
        # The replica outside the archive was removed by reaper
        nb_c_outside_archive -= 1
        replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica)
    # Compared to get_replica, list_replicas resolves archives, must return replicas for all files
    assert len(list(replica_core.list_replicas(dids=constituents))) == 4
    assert len(list(did_core.list_content(**dataset1))) == 4
    assert len(list(did_core.list_archive_content(**archive1))) == 4
    assert len(list(did_core.list_archive_content(**archive2))) == 3
    assert __get_archive_contents_history_count(archive1) == 0
    assert __get_archive_contents_history_count(archive2) == 0

    # Expire the first archive and run reaper again
    # the archive will be removed; and c_first_archive_only must be removed from datasets
    # and from the did table.
    replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1)
    cache_region.invalidate()
    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size)
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**archive1)
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**c_first_archive_only)
    assert len(list(replica_core.list_replicas(dids=constituents))) == 3
    assert len(list(did_core.list_content(**dataset1))) == 3
    assert len(list(did_core.list_archive_content(**archive1))) == 0
    assert len(list(did_core.list_archive_content(**archive2))) == 3
    assert __get_archive_contents_history_count(archive1) == 4
    assert __get_archive_contents_history_count(archive2) == 0

    # Expire the second archive replica and run reaper another time
    # c_with_expired_replica is removed because its external replica got removed at previous step
    # and it exists only inside the archive now.
    # If not open, Dataset2 will be removed because it will be empty.
    did_core.set_status(open=False, **dataset2)
    replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2)
    cache_region.invalidate()
    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size)
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    # The archive must be removed
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**archive2)
    # The DIDs which only existed in the archive are also removed
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**c_first_archive_only)
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**c_with_expired_replica)
    # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed
    assert did_core.get_did(**c_with_replica)
    # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed
    assert did_core.get_did(**c_with_replica_and_rule)
    assert len(list(replica_core.list_replicas(dids=constituents))) == 2
    assert len(list(did_core.list_content(**dataset1))) == 2
    with pytest.raises(DataIdentifierNotFound):
        did_core.get_did(**dataset2)
    assert len(list(did_core.list_content(**dataset2))) == 0
    assert len(list(did_core.list_archive_content(**archive2))) == 0
    assert __get_archive_contents_history_count(archive1) == 4
    assert __get_archive_contents_history_count(archive2) == 3
def test_abacus_collection_replica_new(vo, rse_factory, rucio_client,
                                       did_factory, core_config_mock,
                                       caches_mock):
    """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
    file_sizes = 2
    nfiles = 2
    dataset_scope = 'mock'
    rse, rse_id = rse_factory.make_posix_rse()
    dids = did_factory.upload_test_dataset(rse_name=rse,
                                           scope=dataset_scope,
                                           size=file_sizes,
                                           nb_files=nfiles)
    files = [{
        'scope': did['did_scope'],
        'name': did['did_name']
    } for did in dids]
    dataset = dids[0]['dataset_name']
    rucio_client.set_metadata(dataset_scope, dataset, 'lifetime', -1)
    rucio_client.add_replication_rule([{
        'scope': dataset_scope,
        'name': dataset
    }],
                                      1,
                                      rse,
                                      lifetime=-1)

    # Check dataset replica after rule creation - initial data
    dataset_replica = [
        replica for replica in rucio_client.list_dataset_replicas(
            dataset_scope, dataset)
    ][0]
    assert dataset_replica['bytes'] == 0
    assert dataset_replica['length'] == 0
    assert dataset_replica['available_bytes'] == 0
    assert dataset_replica['available_length'] == 0
    assert str(dataset_replica['state']) == 'UNAVAILABLE'

    # Run Abacus
    collection_replica.run(once=True)

    # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
    dataset_replica = [
        replica for replica in rucio_client.list_dataset_replicas(
            dataset_scope, dataset)
    ][0]
    assert dataset_replica['bytes'] == len(files) * file_sizes
    assert dataset_replica['length'] == len(files)
    assert dataset_replica['available_bytes'] == len(files) * file_sizes
    assert dataset_replica['available_length'] == len(files)
    assert str(dataset_replica['state']) == 'AVAILABLE'

    # Delete one file -> collection replica should be unavailable
    cleaner.run(once=True)
    delete_replicas(rse_id=rse_id,
                    files=[{
                        'name': files[0]['name'],
                        'scope': InternalScope(dataset_scope, vo)
                    }])
    rucio_client.add_replication_rule([{
        'scope': dataset_scope,
        'name': dataset
    }],
                                      1,
                                      rse,
                                      lifetime=-1)
    collection_replica.run(once=True)
    dataset_replica = [
        replica for replica in rucio_client.list_dataset_replicas(
            dataset_scope, dataset)
    ][0]
    assert dataset_replica['length'] == len(files)
    assert dataset_replica['bytes'] == len(files) * file_sizes
    assert dataset_replica['available_length'] == len(files) - 1
    assert dataset_replica['available_bytes'] == (len(files) - 1) * file_sizes
    assert str(dataset_replica['state']) == 'UNAVAILABLE'

    # Delete all files -> collection replica should be deleted
    # New behaviour (dataset should be deleted)
    cleaner.run(once=True)
    delete_replicas(rse_id=rse_id,
                    files=[{
                        'name': files[1]['name'],
                        'scope': InternalScope(dataset_scope, vo)
                    }])
    with pytest.raises(DataIdentifierNotFound):
        get_did(scope=InternalScope(dataset_scope), name=dataset)
Beispiel #14
0
                'bytes': 1L,
                'adler32': '0cc737eb',
                'tombstone': datetime.utcnow() + timedelta(weeks=2),
                'meta': {
                    'events': 10
                }
            } for i in xrange(nbfiles)]
            attach_dids(scope=tmp_scope,
                        name=dsn['name'],
                        rse=rse,
                        dids=files,
                        account='root')
            replicas += files

        undertaker(worker_number=1, total_workers=1, once=True)

        for replica in replicas:
            assert (get_replica(scope=replica['scope'],
                                name=replica['name'],
                                rse=rse)['tombstone'] is None)

        for dsn in dsns2:
            assert (get_did(scope='archive',
                            name=dsn['name'])['name'] == dsn['name'])
            assert (len([
                x for x in list_rules(filters={
                    'scope': 'archive',
                    'name': dsn['name']
                })
            ]) == 1)
Beispiel #15
0
    def test_delete_replicas(self):
        """ REPLICA (CORE): Delete replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]
        add_replicas(rse='MOCK', files=files1, account='root', ignore_availability=True)

        files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]
        add_replicas(rse='MOCK', files=files2, account='root', ignore_availability=True)
        add_replicas(rse='MOCK3', files=files2, account='root', ignore_availability=True)

        delete_replicas(rse='MOCK', files=files1 + files2)

        for file in files1:
            with assert_raises(DataIdentifierNotFound):
                get_did(scope=file['scope'], name=file['name'])

        for file in files2:
            get_did(scope=file['scope'], name=file['name'])

    def test_delete_replicas_from_datasets(self):
        """ REPLICA (CORE): Delete replicas from dataset """
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        nbfiles = 5
        files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]

        add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root')
        add_did(scope=tmp_scope, name=tmp_dsn2, type=DIDType.DATASET, account='root')
Beispiel #16
0
            }
        } for i in xrange(nbfiles)]
        add_replicas(rse='MOCK',
                     files=files2,
                     account='root',
                     ignore_availability=True)
        add_replicas(rse='MOCK3',
                     files=files2,
                     account='root',
                     ignore_availability=True)

        delete_replicas(rse='MOCK', files=files1 + files2)

        for file in files1:
            with assert_raises(DataIdentifierNotFound):
                print get_did(scope=file['scope'], name=file['name'])

        for file in files2:
            get_did(scope=file['scope'], name=file['name'])

    def test_delete_replicas_from_datasets(self):
        """ REPLICA (CORE): Delete replicas from dataset """
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        nbfiles = 5
        files1 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1L,
            'adler32': '0cc737eb',
Beispiel #17
0
    def place(self, did):
        self.__update_penalties()
        decision = {'did': ':'.join(did)}
        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' %
                      (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        available_rses = []
        available_sites = []
        reps = list_dataset_replicas(did[0], did[1])

        num_reps = 0
        for rep in reps:
            if rep['state'] == ReplicaState.AVAILABLE:
                available_rses.append(rep['rse'])
                available_sites.append(self._mc.ddm_to_site(rep['rse']))
                num_reps += 1

        decision['replica_rses'] = available_rses
        decision['num_replicas'] = num_reps
        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        site_ratios = {}
        site_job_info = {}
        for panda_site in self._wc.get_sites():
            site = self._mc.panda_to_site(panda_site)
            job_info = self._wc.get_job_info(panda_site)
            ratio = float(
                job_info[0]) / (float(job_info[1]) + float(job_info[2]) / 2)
            penalty = self._penalties[site]
            site_ratios[site] = ratio * penalty
            site_job_info[site] = (job_info, penalty)

        decision['site_ratios'] = site_ratios
        decision['site_job_info'] = site_job_info
        picked_site = None
        picked_rse = None

        for site, _ in sorted(site_ratios.items(), key=itemgetter(1)):
            if site in available_sites:
                continue
            rses_for_site = self._mc.site_to_ddm(site)
            if rses_for_site is None:
                continue

            for rse in rses_for_site:
                if 'DATADISK' in rse:
                    picked_rse = rse
                    picked_site = site
                    break
            if picked_rse:
                break

        if picked_rse is None:
            decision['error_reason'] = 'could not pick RSE'
            return decision

        decision['destination_rse'] = picked_rse
        if picked_site:
            self._penalties[site] = 1

        picked_source = None
        shuffle(available_rses)
        for rse in available_rses:
            if 'TAPE' in rse:
                continue
            picked_source = rse
            break

        if picked_source is None:
            picked_source = available_rses[0]

        decision['source_rse'] = picked_source
        logging.debug("Picked %s as source and %s as destination RSE" %
                      (picked_source, picked_rse))

        return decision
    def place(self, did):
        self.__update_penalties()
        self._added_bytes.trim()
        self._added_files.trim()

        decision = self.check_did(did)

        if 'error_reason' in decision:
            return decision

        meta = get_did(did[0], did[1])
        available_reps = {}
        reps = list_dataset_replicas(did[0], did[1])
        num_reps = 0
        space_info = self._fsc.get_rse_space()
        max_mbps = 0.0
        for rep in reps:
            rse_attr = list_rse_attributes(rep['rse'])
            src_rse = rep['rse']
            if 'site' not in rse_attr:
                continue

            src_site = rse_attr['site']
            src_rse_info = get_rse(src_rse)

            if 'type' not in rse_attr:
                continue
            if rse_attr['type'] != 'DATADISK':
                continue
            if src_rse_info['availability'] & 4 == 0:
                continue

            if rep['state'] == ReplicaState.AVAILABLE:
                if rep['available_length'] == 0:
                    continue
                net_metrics = {}
                net_metrics_type = None
                for metric_type in ('fts', 'fax', 'perfsonar', 'dashb'):
                    net_metrics_type = metric_type
                    net_metrics = self._nmc.getMbps(src_site, metric_type)
                    if net_metrics:
                        break
                if len(net_metrics) == 0:
                    continue
                available_reps[src_rse] = {}
                for dst_site, mbps in net_metrics.items():
                    if src_site == dst_site:
                        continue
                    if dst_site in self._sites:
                        if mbps > max_mbps:
                            max_mbps = mbps
                        dst_rse = self._sites[dst_site]['rse']
                        dst_rse_info = get_rse(dst_rse)

                        if dst_rse_info['availability'] & 2 == 0:
                            continue

                        site_added_bytes = sum(self._added_bytes.get_series(dst_rse))
                        site_added_files = sum(self._added_files.get_series(dst_rse))

                        if ((site_added_bytes + meta['bytes']) > self._max_bytes_hour_rse):
                            continue
                        if ((site_added_files + meta['length']) > self._max_files_hour_rse):
                            continue

                        queued = self._nmc.getQueuedFiles(src_site, dst_site)

                        # logging.debug('queued %s -> %s: %d' % (src_site, dst_site, queued))
                        if queued > 0:
                            continue
                        rse_space = space_info.get(dst_rse, {'free': 0, 'total': 1})
                        if src_rse not in self._src_penalties:
                            self._src_penalties[src_rse] = 100.0
                        src_penalty = self._src_penalties[src_rse]
                        if dst_rse not in self._dst_penalties:
                            self._dst_penalties[dst_rse] = 100.0
                        dst_penalty = self._dst_penalties[dst_rse]

                        free_space = float(rse_space['free']) / float(rse_space['total']) * 100.0
                        available_reps[src_rse][dst_rse] = {'free_space': free_space, 'src_penalty': src_penalty, 'dst_penalty': dst_penalty, 'mbps': float(mbps), 'metrics_type': net_metrics_type}

                num_reps += 1

        # decision['replica_rses'] = available_reps
        decision['num_replicas'] = num_reps

        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        src_dst_ratios = []

        if max_mbps == 0.0:
            decision['error_reason'] = 'could not find enough network metrics'
            return decision

        for src, dsts in available_reps.items():
            for dst, metrics in dsts.items():
                if dst in available_reps:
                    continue
                bdw = (metrics['mbps'] / max_mbps) * 100.0
                src_penalty = self._src_penalties[src]
                dst_penalty = self._dst_penalties[dst]

                ratio = ((metrics['free_space'] / 4.0) + bdw) * src_penalty * dst_penalty
                src_dst_ratios.append((src, dst, ratio))

        if len(src_dst_ratios) == 0:
            decision['error_reason'] = 'found no suitable src/dst for replication'
            return decision

        sorted_ratios = sorted(src_dst_ratios, key=itemgetter(2), reverse=True)
        logging.debug(sorted_ratios)
        destination_rse = sorted_ratios[0][1]
        source_rse = sorted_ratios[0][0]
        decision['destination_rse'] = destination_rse
        decision['source_rse'] = source_rse
        # decision['rse_ratios'] = src_dst_ratios
        self._dst_penalties[destination_rse] = 10.0
        self._src_penalties[source_rse] = 10.0

        self._added_cache.add_dataset(':'.join(did))

        self._added_bytes.add_point(destination_rse, meta['bytes'])
        self._added_files.add_point(destination_rse, meta['length'])

        self._added_bytes.add_point('total', meta['bytes'])
        self._added_files.add_point('total', meta['length'])

        return decision