コード例 #1
0
ファイル: test_reaper.py プロジェクト: pradeepjasal/rucio
def test_reaper_multi_vo(vo, second_vo, scope_factory, caches_mock, file_config_mock):
    """ REAPER (DAEMON): Test the reaper daemon with multiple vo."""
    [cache_region] = caches_mock
    new_vo = second_vo
    _, [scope_tst, scope_new] = scope_factory(vos=[vo, new_vo])

    nb_files = 250
    file_size = 200  # 2G
    rse1_name, rse1_id, dids1 = __add_test_rse_and_replicas(vo=vo, scope=scope_tst, rse_name=rse_name_generator(),
                                                            names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size)
    rse2_name, rse2_id, dids2 = __add_test_rse_and_replicas(vo=new_vo, scope=scope_new, rse_name=rse_name_generator(),
                                                            names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size)

    rse_core.set_rse_limits(rse_id=rse1_id, name='MinFreeSpace', value=50 * file_size)
    rse_core.set_rse_limits(rse_id=rse2_id, name='MinFreeSpace', value=50 * file_size)

    # Check we reap all VOs by default
    cache_region.invalidate()
    rse_core.set_rse_usage(rse_id=rse1_id, source='storage', used=nb_files * file_size, free=1)
    rse_core.set_rse_usage(rse_id=rse2_id, source='storage', used=nb_files * file_size, free=1)
    both_rses = '%s|%s' % (rse1_name, rse2_name)
    reaper(once=True, rses=[], include_rses=both_rses, exclude_rses=None)
    reaper(once=True, rses=[], include_rses=both_rses, exclude_rses=None)
    assert len(list(replica_core.list_replicas(dids=dids1, rse_expression=both_rses))) == 200
    assert len(list(replica_core.list_replicas(dids=dids2, rse_expression=both_rses))) == 200
コード例 #2
0
ファイル: test_reaper.py プロジェクト: tbeerman/rucio
def test_reaper():
    """ REAPER (DAEMON): Test the reaper daemon."""
    nb_files = 30
    file_size = 2147483648L  # 2G
    for i in xrange(nb_files):
        replica_core.add_replica(rse='MOCK',
                                 scope='data13_hip',
                                 name='lfn' + generate_uuid(),
                                 bytes=file_size,
                                 account='root',
                                 adler32=None,
                                 md5=None)

    rse_core.set_rse_usage(rse='MOCK',
                           source='srm',
                           used=nb_files * file_size,
                           free=800L)
    rse_core.set_rse_limits(rse='MOCK',
                            name='MinFreeSpace',
                            value=10737418240L)
    rse_core.set_rse_limits(rse='MOCK', name='MaxBeingDeletedFiles', value=10)

    rses = [
        rse_core.get_rse('MOCK'),
    ]
    reaper(once=True, rses=rses)
    reaper(once=True, rses=rses)
コード例 #3
0
ファイル: test_reaper.py プロジェクト: pradeepjasal/rucio
def test_archive_of_deleted_dids(vo, did_factory, root_account, core_config_mock, caches_mock, file_config_mock):
    """ REAPER (DAEMON): Test that the options to keep the did and content history work."""
    [reaper_cache_region, _config_cache_region, _replica_cache_region] = caches_mock
    scope = InternalScope('data13_hip', vo=vo)
    account = root_account

    nb_files = 10
    file_size = 200  # 2G
    rse_name, rse_id, dids = __add_test_rse_and_replicas(vo=vo, scope=scope, rse_name=rse_name_generator(),
                                                         names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size, epoch_tombstone=True)
    dataset = did_factory.make_dataset()
    did_core.attach_dids(dids=dids, account=account, **dataset)

    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size)
    assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files

    # Check first if the reaper does not delete anything if no space is needed
    reaper_cache_region.invalidate()
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None, greedy=True)
    assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == 0

    file_clause = []
    for did in dids:
        file_clause.append(and_(models.DeletedDataIdentifier.scope == did['scope'], models.DeletedDataIdentifier.name == did['name']))

    session = get_session()
    query = session.query(models.DeletedDataIdentifier.scope,
                          models.DeletedDataIdentifier.name,
                          models.DeletedDataIdentifier.did_type).\
        filter(or_(*file_clause))

    deleted_dids = list()
    for did in query.all():
        print(did)
        deleted_dids.append(did)
    assert len(deleted_dids) == len(dids)

    query = session.query(models.DataIdentifierAssociationHistory.child_scope,
                          models.DataIdentifierAssociationHistory.child_name,
                          models.DataIdentifierAssociationHistory.child_type).\
        filter(and_(models.DataIdentifierAssociationHistory.scope == dataset['scope'], models.DataIdentifierAssociationHistory.name == dataset['name']))

    deleted_dids = list()
    for did in query.all():
        print(did)
        deleted_dids.append(did)
    assert len(deleted_dids) == len(dids)
コード例 #4
0
def test_reaper():
    """ REAPER (DAEMON): Test the reaper daemon."""
    if config_get_bool('common', 'multi_vo', raise_exception=False, default=False):
        vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')}
    else:
        vo = {}

    rse_name = rse_name_generator()
    rse_id = rse_core.add_rse(rse_name, **vo)

    mock_protocol = {'scheme': 'MOCK',
                     'hostname': 'localhost',
                     'port': 123,
                     'prefix': '/test/reaper',
                     'impl': 'rucio.rse.protocols.mock.Default',
                     'domains': {
                         'lan': {'read': 1,
                                 'write': 1,
                                 'delete': 1},
                         'wan': {'read': 1,
                                 'write': 1,
                                 'delete': 1}}}
    rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol)

    nb_files = 30
    file_size = 2147483648  # 2G

    file_names = []
    for i in range(nb_files):
        file_name = 'lfn' + generate_uuid()
        file_names.append(file_name)
        replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo),
                                 name=file_name, bytes=file_size,
                                 tombstone=datetime.utcnow() - timedelta(days=1),
                                 account=InternalAccount('root', **vo), adler32=None, md5=None)

    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800)
    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240)
    rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10)

    rses = [rse_core.get_rse(rse_id), ]
    reaper(once=True, rses=rses)
    reaper(once=True, rses=rses)
    assert_equal(len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in file_names], rse_expression=rse_name))), nb_files - 10)
コード例 #5
0
ファイル: test_reaper.py プロジェクト: pradeepjasal/rucio
def test_reaper(vo, caches_mock, file_config_mock):
    """ REAPER (DAEMON): Test the reaper daemon."""
    [cache_region] = caches_mock
    scope = InternalScope('data13_hip', vo=vo)

    nb_files = 250
    file_size = 200  # 2G
    rse_name, rse_id, dids = __add_test_rse_and_replicas(vo=vo, scope=scope, rse_name=rse_name_generator(),
                                                         names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size)

    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size)
    assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files

    # Check first if the reaper does not delete anything if no space is needed
    cache_region.invalidate()
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files

    # Now put it over threshold and delete
    cache_region.invalidate()
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    assert len(list(replica_core.list_replicas(dids, rse_expression=rse_name))) == 200
コード例 #6
0
ファイル: test_reaper.py プロジェクト: pradeepjasal/rucio
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account, caches_mock, file_config_mock):
    [cache_region] = caches_mock
    rse_name, rse_id = rse_factory.make_mock_rse()
    scope = mock_scope
    account = root_account

    # Create 2 archives and 4 files:
    # - One only exists in the first archive
    # - One in both, plus another replica, which is not in an archive
    # - One in both, plus another replica, which is not in an archive; and this replica has expired
    # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica
    # Also add these files to datasets, one of which will be removed at the end
    nb_constituents = 4
    nb_c_outside_archive = nb_constituents - 1
    constituent_size = 2000
    archive_size = 1000
    uuid = str(generate_uuid())
    constituents = [{'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i)} for i in range(nb_constituents)]
    did_factory.register_dids(constituents)
    c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents

    replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, **c_with_replica)

    replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size,
                             tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica)

    replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size,
                             tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule)
    rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE',
                       weight=None, lifetime=None, locked=False, subscription_id=None)

    archive1, archive2 = [{'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i)} for i in range(2)]
    replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive1)
    replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive2)
    did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in constituents],
                         account=account, **archive1)
    did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule]],
                         account=account, **archive2)

    dataset1, dataset2 = [{'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i)} for i in range(2)]
    did_core.add_did(did_type='DATASET', account=account, **dataset1)
    did_core.attach_dids(dids=constituents, account=account, **dataset1)
    did_core.add_did(did_type='DATASET', account=account, **dataset2)
    did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2)

    @read_session
    def __get_archive_contents_history_count(archive, session=None):
        return session.query(ConstituentAssociationHistory).filter_by(**archive).count()

    # Run reaper the first time.
    # the expired non-archive replica of c_with_expired_replica must be removed,
    # but the did must not be removed, and it must still remain in the dataset because
    # it still has the replica from inside the archive
    assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica)
    cache_region.invalidate()
    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size)
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    for did in constituents + [archive1, archive2]:
        assert did_core.get_did(**did)
    for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]:
        assert replica_core.get_replica(rse_id=rse_id, **did)
    with pytest.raises(ReplicaNotFound):
        # The replica is only on the archive, not on the constituent
        replica_core.get_replica(rse_id=rse_id, **c_first_archive_only)
    with pytest.raises(ReplicaNotFound):
        # The replica outside the archive was removed by reaper
        nb_c_outside_archive -= 1
        replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica)
    # Compared to get_replica, list_replicas resolves archives, must return replicas for all files
    assert len(list(replica_core.list_replicas(dids=constituents))) == 4
    assert len(list(did_core.list_content(**dataset1))) == 4
    assert len(list(did_core.list_archive_content(**archive1))) == 4
    assert len(list(did_core.list_archive_content(**archive2))) == 3
    assert __get_archive_contents_history_count(archive1) == 0
    assert __get_archive_contents_history_count(archive2) == 0

    # Expire the first archive and run reaper again
    # the archive will be removed; and c_first_archive_only must be removed from datasets
    # and from the did table.
    replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1)
    cache_region.invalidate()
    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size)
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**archive1)
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**c_first_archive_only)
    assert len(list(replica_core.list_replicas(dids=constituents))) == 3
    assert len(list(did_core.list_content(**dataset1))) == 3
    assert len(list(did_core.list_archive_content(**archive1))) == 0
    assert len(list(did_core.list_archive_content(**archive2))) == 3
    assert __get_archive_contents_history_count(archive1) == 4
    assert __get_archive_contents_history_count(archive2) == 0

    # Expire the second archive replica and run reaper another time
    # c_with_expired_replica is removed because its external replica got removed at previous step
    # and it exists only inside the archive now.
    # If not open, Dataset2 will be removed because it will be empty.
    did_core.set_status(open=False, **dataset2)
    replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2)
    cache_region.invalidate()
    rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size)
    rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1)
    reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None)
    # The archive must be removed
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**archive2)
    # The DIDs which only existed in the archive are also removed
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**c_first_archive_only)
    with pytest.raises(DataIdentifierNotFound):
        assert did_core.get_did(**c_with_expired_replica)
    # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed
    assert did_core.get_did(**c_with_replica)
    # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed
    assert did_core.get_did(**c_with_replica_and_rule)
    assert len(list(replica_core.list_replicas(dids=constituents))) == 2
    assert len(list(did_core.list_content(**dataset1))) == 2
    with pytest.raises(DataIdentifierNotFound):
        did_core.get_did(**dataset2)
    assert len(list(did_core.list_content(**dataset2))) == 0
    assert len(list(did_core.list_archive_content(**archive2))) == 0
    assert __get_archive_contents_history_count(archive1) == 4
    assert __get_archive_contents_history_count(archive2) == 3
コード例 #7
0
def test_source_avoid_deletion(vo, caches_mock, core_config_mock, rse_factory,
                               did_factory, root_account, file_factory):
    """ Test that sources on a file block it from deletion """

    _, reaper_region = caches_mock
    src_rse1, src_rse1_id = rse_factory.make_mock_rse()
    src_rse2, src_rse2_id = rse_factory.make_mock_rse()
    dst_rse, dst_rse_id = rse_factory.make_mock_rse()
    all_rses = [src_rse1_id, src_rse2_id, dst_rse_id]
    any_source = f'{src_rse1}|{src_rse2}'

    for rse_id in [src_rse1_id, src_rse2_id]:
        rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=1)
        rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=1, free=0)
    distance_core.add_distance(src_rse1_id, dst_rse_id, ranking=20)
    distance_core.add_distance(src_rse2_id, dst_rse_id, ranking=10)

    # Upload a test file to both rses without registering
    did = did_factory.random_did()

    # Register replica on one source RSE
    replica_core.add_replica(rse_id=src_rse1_id,
                             account=root_account,
                             bytes_=1,
                             tombstone=datetime(year=1970, month=1, day=1),
                             **did)
    rule_core.add_rule(dids=[did],
                       account=root_account,
                       copies=1,
                       rse_expression=dst_rse,
                       grouping='ALL',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)

    # Reaper will not delete a file which only has one replica if there is any pending transfer for it
    reaper_region.invalidate()
    reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None)
    replica = next(
        iter(replica_core.list_replicas(dids=[did],
                                        rse_expression=any_source)))
    assert len(replica['pfns']) == 1

    # Register replica on second source rse
    replica_core.add_replica(rse_id=src_rse2_id,
                             account=root_account,
                             bytes_=1,
                             tombstone=datetime(year=1970, month=1, day=1),
                             **did)
    replica = next(
        iter(replica_core.list_replicas(dids=[did],
                                        rse_expression=any_source)))
    assert len(replica['pfns']) == 2

    # Submit the transfer. This will create the sources.
    submitter(once=True,
              rses=[{
                  'id': rse_id
              } for rse_id in all_rses],
              partition_wait_time=None,
              transfertool='mock',
              transfertype='single',
              filter_transfertool=None)

    # None of the replicas will be removed. They are protected by an entry in the sources table
    reaper_region.invalidate()
    reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None)
    replica = next(
        iter(replica_core.list_replicas(dids=[did],
                                        rse_expression=any_source)))
    assert len(replica['pfns']) == 2

    @transactional_session
    def __delete_sources(rse_id, scope, name, session=None):
        session.execute(
            delete(Source).where(Source.rse_id == rse_id,
                                 Source.scope == scope, Source.name == name))

    # Deletion succeeds for one replica (second still protected by existing request)
    __delete_sources(src_rse1_id, **did)
    __delete_sources(src_rse2_id, **did)
    reaper_region.invalidate()
    reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None)
    replica = next(
        iter(replica_core.list_replicas(dids=[did],
                                        rse_expression=any_source)))
    assert len(replica['pfns']) == 1