Python file_generator Examples, rucio.tests.common.file_generator Python Examples

Example #1

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_detach_files_dataset(self):
     """CLIENT(USER): Rucio detach files to dataset"""
     # Attach files to a dataset using the attach method
     tmp_file1 = file_generator()
     tmp_file2 = file_generator()
     tmp_file3 = file_generator()
     tmp_dsn = self.user + ':DSet' + rse_name_generator()  # something like mock:DSetMOCK_S0M37HING
     # Adding files to a new dataset
     cmd = 'rucio upload --rse {0} --scope {1} --files {2} {3} {4} --did {5}'.format(self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     remove(tmp_file1)
     remove(tmp_file2)
     remove(tmp_file3)
     # detach the files to the dataset
     cmd = 'rucio detach --from {0} {1}:{2} {1}:{3}'.format(tmp_dsn, self.user, tmp_file2[5:], tmp_file3[5:])  # triming '/tmp/' from filenames
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     # searching for the file in the new dataset
     cmd = 'rucio list-files {0}'.format(tmp_dsn)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     # tmp_file1 must be in the dataset
     nose.tools.assert_not_equal(re.search("{0}:{1}".format(self.user, tmp_file1[5:]), out), None)
     # tmp_file3 must be in the dataset
     nose.tools.assert_equal(re.search("{0}:{1}".format(self.user, tmp_file3[5:]), out), None)

Example #2

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_upload_repeated_file_dataset(self):
     """CLIENT(USER): Rucio upload repeated files to dataset"""
     # One of the files to upload is already in the dataset
     tmp_file1 = file_generator()
     tmp_file2 = file_generator()
     tmp_file3 = file_generator()
     tmp_dsn = self.user + ':DSet' + rse_name_generator()  # something like mock:DSetMOCK_S0M37HING
     # Adding files to a new dataset
     cmd = 'rucio upload --rse {0} --scope {1} --files {2} --did {3}'.format(self.def_rse, self.user, tmp_file1, tmp_dsn)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     # upload the files to the dataset
     cmd = 'rucio upload --rse {0} --scope {1} --files {2} {3} {4} --did {5}'.format(self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     remove(tmp_file1)
     remove(tmp_file2)
     remove(tmp_file3)
     # searching for the file in the new dataset
     cmd = 'rucio list-files {0}'.format(tmp_dsn)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     # tmp_file1 must be in the dataset
     nose.tools.assert_not_equal(re.search("{0}:{1}".format(self.user, tmp_file1[5:]), out), None)
     # tmp_file3 must be in the dataset
     nose.tools.assert_not_equal(re.search("{0}:{1}".format(self.user, tmp_file3[5:]), out), None)

Example #3

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_add_files_to_dataset(self):
     """CLIENT(USER): Rucio add files to dataset"""
     tmp_file1 = file_generator()
     tmp_file2 = file_generator()
     tmp_dataset = self.user + ':DSet' + rse_name_generator()  # something like mock:DSetMOCK_S0M37HING
     # add files
     cmd = 'rucio upload --rse {0} --scope {1} --files {2} {3}'.format(self.def_rse, self.user, tmp_file1, tmp_file2)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # create dataset
     cmd = 'rucio add-dataset ' + tmp_dataset
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add files to dataset
     cmd = 'rucio add-files-to-dataset --to {0} mock:{1} mock:{2}'.format(tmp_dataset, tmp_file1[5:], tmp_file2[5:])  # triming '/tmp/' from filename
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # find the added files
     cmd = 'rucio list-files ' + tmp_dataset
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None)

Example #4

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_upload_file(self):
     """CLIENT(USER): Rucio upload files"""
     tmp_file1 = file_generator()
     tmp_file2 = file_generator()
     tmp_file3 = file_generator()
     cmd = 'rucio upload --rse {0} --scope {1} --files {2} {3} {4}'.format(self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     remove(tmp_file1)
     remove(tmp_file2)
     remove(tmp_file3)
     nose.tools.assert_not_equal(re.search("File {0}:{1} successfully uploaded on the storage".format(self.user, tmp_file1[5:]), out), None)

Example #5

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_add_delete_add_file(self):
     """CLIENT(USER): Add/Delete/Add"""
     tmp_file1 = file_generator()
     # add file
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # get the rule for the file
     cmd = "rucio list-rules --did {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     rule = out
     # delete the file from the catalog
     cmd = "rucio delete-rule {0}".format(rule)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # delete the fisical file
     cmd = "find /tmp/rucio_rse/ -name {0} |xargs rm".format(tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # modify the file to avoid same checksum
     cmd = "echo 'delta' >> {0}".format(tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add the same file
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     nose.tools.assert_equal(re.search("File {0}:{1} successfully uploaded on the storage".format(self.user, tmp_file1[5:]), out), None)

Example #6

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_download_dataset(self):
     """CLIENT(USER): Rucio download dataset"""
     tmp_file1 = file_generator()
     tmp_dataset = self.user + ':DSet' + rse_name_generator()  # something like mock:DSetMOCK_S0M37HING
     # add files
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # create dataset
     cmd = 'rucio add-dataset ' + tmp_dataset
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add files to dataset
     cmd = 'rucio add-files-to-dataset --to {0} {1}:{2}'.format(tmp_dataset, self.user, tmp_file1[5:])  # triming '/tmp/' from filename
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # download dataset
     cmd = 'rucio download --dir /tmp {0}'.format(tmp_dataset)  # triming '/tmp/' from filename
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     search = 'Getting file {0}:{1}'.format(self.user, tmp_file1[5:])
     nose.tools.assert_not_equal(re.search(search, err), None)
     search = 'File validated'
     nose.tools.assert_not_equal(re.search(search, out), None)
     search = 'DID ' + tmp_dataset
     nose.tools.assert_not_equal(re.search(search, out), None)

Example #7

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_add_file_twice(self):
     """CLIENT(USER): Add file twice"""
     tmp_file1 = file_generator()
     # add file twice
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     nose.tools.assert_equal(re.search("File {0}:{1} successfully uploaded on the storage".format(self.user, tmp_file1[5:]), out), None)

Example #8

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_create_rule(self):
     """CLIENT(USER): Rucio add rule"""
     tmp_file1 = file_generator()
     # add files
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse
     tmp_rse = rse_name_generator()
     cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     # add rse atributes
     cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse
     tmp_rse = rse_name_generator()
     cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse atributes
     cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse
     tmp_rse = rse_name_generator()
     cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse atributes
     cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rules
     cmd = "rucio add-rule {0}:{1} 3 'spacetoken=ATLASSCRATCHDISK'".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     rule = out[:-1]  # triming new line character
     # check if rule exist for the file
     cmd = "rucio list-rules --did {0}:{1}".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     nose.tools.assert_not_equal(re.search(rule, out), None)

Example #9

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_upload_repeated_file(self):
     """CLIENT(USER): Rucio upload repeated files"""
     # One of the files to upload is already catalogued but was removed
     tmp_file1 = file_generator()
     tmp_file2 = file_generator()
     tmp_file3 = file_generator()
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     # get the rule for the file
     cmd = "rucio list-rules --did {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     rule = out
     # delete the file from the catalog
     cmd = "rucio delete-rule {0}".format(rule)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # delete the fisical file
     cmd = "find /tmp/rucio_rse/ -name {0} |xargs rm".format(tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     cmd = 'rucio upload --rse {0} --scope {1} --files {2} {3} {4}'.format(self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     remove(tmp_file1)
     remove(tmp_file2)
     remove(tmp_file3)
     nose.tools.assert_not_equal(re.search("File {0}:{1} successfully uploaded on the storage".format(self.user, tmp_file2[5:]), out), None)

Example #10

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_delete_rule(self):
     """CLIENT(USER): rule deletion"""
     tmp_file1 = file_generator()
     # add files
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse
     tmp_rse = rse_name_generator()
     cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     # add rse atributes
     cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rules
     cmd = "rucio add-rule {0}:{1} 1 'spacetoken=ATLASSCRATCHDISK'".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     # get the rules for the file
     cmd = "rucio list-rules --did {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     (rule1, rule2) = out.split()
     # delete the rules for the file
     cmd = "rucio delete-rule {0}".format(rule1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     cmd = "rucio delete-rule {0}".format(rule2)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # search for the file
     cmd = "rucio list-dids {0}:{1}".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     nose.tools.assert_equal('', out)

Example #11

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_detach_non_existing_file(self):
     """CLIENT(USER): Rucio detach a non existing file"""
     tmp_file1 = file_generator()
     tmp_dsn = self.user + ':DSet' + rse_name_generator()  # something like mock:DSetMOCK_S0M37HING
     # Adding files to a new dataset
     cmd = 'rucio upload --rse {0} --scope {1} --files {2} --did {3}'.format(self.def_rse, self.user, tmp_file1, tmp_dsn)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     remove(tmp_file1)
     # attach the files to the dataset
     cmd = 'rucio detach --from {0} {1}:{2}'.format(tmp_dsn, self.user, 'file_ghost')  # triming '/tmp/' from filenames
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     print err
     nose.tools.assert_not_equal(re.search("Failed to detach data identifier", err), None)

Example #12

0

Show file

File: test_bin_rucio.py Project: pombredanne/rucio

 def test_download_file(self):
     """CLIENT(USER): Rucio download files"""
     tmp_file1 = file_generator()
     # add files
     cmd = 'rucio upload --rse {0} --scope {1} --files {2}'.format(self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # download files
     cmd = 'rucio download --dir /tmp {0}:{1}'.format(self.user, tmp_file1[5:])  # triming '/tmp/' from filename
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # search for the files with ls
     cmd = 'ls /tmp/{0}'.format(self.user)   # search in /tmp/mock
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None)

Example #13

0

Show file

 def test_create_rule(self):
     """CLIENT(USER): Rucio add rule"""
     tmp_file1 = file_generator()
     # add files
     cmd = 'rucio upload --rse {0} --scope {1} {2}'.format(
         self.def_rse, self.user, tmp_file1)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse
     tmp_rse = rse_name_generator()
     cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     # add quota
     set_account_limit('root', get_rse_id(tmp_rse), -1)
     # add rse atributes
     cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format(
         tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse
     tmp_rse = rse_name_generator()
     cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add quota
     set_account_limit('root', get_rse_id(tmp_rse), -1)
     # add rse atributes
     cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format(
         tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rse
     tmp_rse = rse_name_generator()
     cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add quota
     set_account_limit('root', get_rse_id(tmp_rse), -1)
     # add rse atributes
     cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format(
         tmp_rse)
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     # add rules
     cmd = "rucio add-rule {0}:{1} 3 'spacetoken=ATLASSCRATCHDISK'".format(
         self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out
     rule = out[:-1]  # triming new line character
     # check if rule exist for the file
     cmd = "rucio list-rules {0}:{1}".format(self.user, tmp_file1[5:])
     print self.marker + cmd
     exitcode, out, err = execute(cmd)
     print out, err
     nose.tools.assert_not_equal(re.search(rule, out), None)

Example #14

0

Show file

    def setUp(self):

        self.replica_client = ReplicaClient()

        # Using two test RSEs
        self.rse4suspicious = 'MOCK_SUSPICIOUS'
        self.rse4recovery = 'MOCK_RECOVERY'
        self.scope = 'mock'

        # For testing, we create 3 files and upload them to Rucio to two test RSEs.
        self.tmp_file1 = file_generator()
        self.tmp_file2 = file_generator()
        self.tmp_file3 = file_generator()

        self.listdids = [{
            'scope': self.scope,
            'name': path.basename(self.tmp_file1),
            'type': DIDType.FILE
        }, {
            'scope': self.scope,
            'name': path.basename(self.tmp_file2),
            'type': DIDType.FILE
        }, {
            'scope': self.scope,
            'name': path.basename(self.tmp_file3),
            'type': DIDType.FILE
        }]

        for rse in [self.rse4suspicious, self.rse4recovery]:
            cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4}'.format(
                rse, self.scope, self.tmp_file1, self.tmp_file2,
                self.tmp_file3)
            exitcode, out, err = execute(cmd)

            # checking if Rucio upload went OK
            assert_true(exitcode == 0)

        # removing physical files from /tmp location - keeping only their DB info
        remove(self.tmp_file1)
        remove(self.tmp_file2)
        remove(self.tmp_file3)

        # Gather replica info
        replicalist = list_replicas(dids=self.listdids)

        # Changing the replica statuses as follows:
        # --------------------------------------------------------------------------------------------
        # Name         State(s) declared on MOCK_RECOVERY       State(s) declared on MOCK_SUSPICIOUS
        # --------------------------------------------------------------------------------------------
        # tmp_file1    available                                suspicious (available)
        # tmp_file2    available                                suspicious + bad (unavailable)
        # tmp_file3    unavailable                              suspicious (available)
        # --------------------------------------------------------------------------------------------

        for replica in replicalist:
            for i in range(3):
                print("Declaring suspicious file replica: " +
                      replica['rses'][self.rse4suspicious][0])
                self.replica_client.declare_suspicious_file_replicas([
                    replica['rses'][self.rse4suspicious][0],
                ], 'This is a good reason.')
                sleep(1)
            if replica['name'] == path.basename(self.tmp_file2):
                print("Declaring bad file replica: " +
                      replica['rses'][self.rse4suspicious][0])
                self.replica_client.declare_bad_file_replicas([
                    replica['rses'][self.rse4suspicious][0],
                ], 'This is a good reason')
            if replica['name'] == path.basename(self.tmp_file3):
                print("Updating replica state as unavailable: " +
                      replica['rses'][self.rse4recovery][0])
                update_replica_state(self.rse4recovery, self.scope,
                                     path.basename(self.tmp_file3),
                                     ReplicaState.UNAVAILABLE)

        # Gather replica info after setting initial replica statuses
        replicalist = list_replicas(dids=self.listdids)

        # Checking if the status changes were effective
        for replica in replicalist:
            if replica['name'] == path.basename(self.tmp_file1):
                assert_true(
                    replica['states'][self.rse4suspicious] == 'AVAILABLE')
                assert_true(
                    replica['states'][self.rse4recovery] == 'AVAILABLE')
            if replica['name'] == path.basename(self.tmp_file2):
                assert_true(
                    (self.rse4suspicious in replica['states']) is False)
                assert_true(
                    replica['states'][self.rse4recovery] == 'AVAILABLE')
            if replica['name'] == path.basename(self.tmp_file3):
                assert_true(
                    replica['states'][self.rse4suspicious] == 'AVAILABLE')
                assert_true((self.rse4recovery in replica['states']) is False)

        # Checking if only self.tmp_file2 is declared as 'BAD'
        self.from_date = datetime.now() - timedelta(days=1)
        bad_replicas_list = list_bad_replicas_status(
            rse=self.rse4suspicious, younger_than=self.from_date)
        bad_checklist = [(badf['name'], badf['rse'], badf['state'])
                         for badf in bad_replicas_list]

        assert_true((path.basename(self.tmp_file2), self.rse4suspicious,
                     BadFilesStatus.BAD) in bad_checklist)
        assert_true((path.basename(self.tmp_file1), self.rse4suspicious,
                     BadFilesStatus.BAD) not in bad_checklist)
        assert_true((path.basename(self.tmp_file3), self.rse4suspicious,
                     BadFilesStatus.BAD) not in bad_checklist)

        bad_replicas_list = list_bad_replicas_status(
            rse=self.rse4recovery, younger_than=self.from_date)
        bad_checklist = [(badf['name'], badf['rse'], badf['state'])
                         for badf in bad_replicas_list]

        assert_true((path.basename(self.tmp_file1), self.rse4recovery,
                     BadFilesStatus.BAD) not in bad_checklist)
        assert_true((path.basename(self.tmp_file2), self.rse4recovery,
                     BadFilesStatus.BAD) not in bad_checklist)
        assert_true((path.basename(self.tmp_file3), self.rse4recovery,
                     BadFilesStatus.BAD) not in bad_checklist)

Example #15

0

Show file

File: test_replica_recoverer.py Project: abhijeetsharma200/rucio

    def setUp(self):
        if config_get_bool('common',
                           'multi_vo',
                           raise_exception=False,
                           default=False):
            self.vo = {'vo': get_vo()}
        else:
            self.vo = {}

        self.replica_client = ReplicaClient()
        assert hasattr(self, "rse_factory")
        rse_factory = self.rse_factory

        # Using two test RSEs
        self.rse4suspicious, self.rse4suspicious_id = rse_factory.make_posix_rse(
            deterministic=True, **self.vo)
        self.rse4recovery, self.rse4recovery_id = rse_factory.make_posix_rse(
            deterministic=True, **self.vo)
        self.scope = 'mock'
        self.internal_scope = InternalScope(self.scope, **self.vo)

        # For testing, we create 5 files and upload them to Rucio to two test RSEs.
        self.tmp_file1 = file_generator()
        self.tmp_file2 = file_generator()
        self.tmp_file3 = file_generator()
        self.tmp_file4 = file_generator()
        self.tmp_file5 = file_generator()

        self.listdids = [{
            'scope': self.internal_scope,
            'name': path.basename(f),
            'type': DIDType.FILE
        } for f in [
            self.tmp_file1, self.tmp_file2, self.tmp_file3, self.tmp_file4,
            self.tmp_file5
        ]]

        for rse in [self.rse4suspicious, self.rse4recovery]:
            cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4} {5} {6}'.format(
                rse, self.scope, self.tmp_file1, self.tmp_file2,
                self.tmp_file3, self.tmp_file4, self.tmp_file5)
            exitcode, out, err = execute(cmd)

            print(exitcode, out, err)
            # checking if Rucio upload went OK
            assert exitcode == 0

        # Set fictional datatypes
        set_metadata(self.internal_scope, path.basename(self.tmp_file4),
                     'datatype', 'testtypedeclarebad')
        set_metadata(self.internal_scope, path.basename(self.tmp_file5),
                     'datatype', 'testtypenopolicy')

        # Allow for the RSEs to be affected by the suspicious file recovery daemon
        add_rse_attribute(self.rse4suspicious_id,
                          "enable_suspicious_file_recovery", True)
        add_rse_attribute(self.rse4recovery_id,
                          "enable_suspicious_file_recovery", True)

        # removing physical files from /tmp location - keeping only their DB info
        remove(self.tmp_file1)
        remove(self.tmp_file2)
        remove(self.tmp_file3)
        remove(self.tmp_file4)
        remove(self.tmp_file5)

        # Gather replica info
        replicalist = list_replicas(dids=self.listdids)

        # Changing the replica statuses as follows:
        # ----------------------------------------------------------------------------------------------------------------------------------
        # Name         State(s) declared on rse4recovery       State(s) declared on rse4suspicious        Metadata "datatype"
        # ----------------------------------------------------------------------------------------------------------------------------------
        # tmp_file1    available                                suspicious (available)
        # tmp_file2    available                                suspicious + bad (unavailable)
        # tmp_file3    unavailable                              suspicious (available)                      RAW
        # tmp_file4    unavailable                              suspicious (available)                      testtypedeclarebad
        # tmp_file5    unavailable                              suspicious (available)                      testtypenopolicy
        # ----------------------------------------------------------------------------------------------------------------------------------

        for replica in replicalist:
            suspicious_pfns = replica['rses'][self.rse4suspicious_id]
            for i in range(3):
                print("Declaring suspicious file replica: " +
                      suspicious_pfns[0])
                # The reason must contain the word "checksum", so that the replica can be declared bad.
                self.replica_client.declare_suspicious_file_replicas([
                    suspicious_pfns[0],
                ], 'checksum')
                sleep(1)
            if replica['name'] == path.basename(self.tmp_file2):
                print("Declaring bad file replica: " + suspicious_pfns[0])
                self.replica_client.declare_bad_file_replicas([
                    suspicious_pfns[0],
                ], 'checksum')
            if replica['name'] == path.basename(self.tmp_file3):
                print("Updating replica state as unavailable: " +
                      replica['rses'][self.rse4recovery_id][0])
                update_replica_state(self.rse4recovery_id, self.internal_scope,
                                     path.basename(self.tmp_file3),
                                     ReplicaState.UNAVAILABLE)
            if replica['name'] == path.basename(self.tmp_file4):
                print("Updating replica state as unavailable: " +
                      replica['rses'][self.rse4recovery_id][0])
                update_replica_state(self.rse4recovery_id, self.internal_scope,
                                     path.basename(self.tmp_file4),
                                     ReplicaState.UNAVAILABLE)
            if replica['name'] == path.basename(self.tmp_file5):
                print("Updating replica state as unavailable: " +
                      replica['rses'][self.rse4recovery_id][0])
                update_replica_state(self.rse4recovery_id, self.internal_scope,
                                     path.basename(self.tmp_file5),
                                     ReplicaState.UNAVAILABLE)

        # Gather replica info after setting initial replica statuses
        replicalist = list_replicas(dids=self.listdids)

        # Checking if the status changes were effective
        for replica in replicalist:
            if replica['name'] == path.basename(self.tmp_file1):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert replica['states'][self.rse4recovery_id] == 'AVAILABLE'
            if replica['name'] == path.basename(self.tmp_file2):
                assert (self.rse4suspicious_id in replica['states']) is False
                assert replica['states'][self.rse4recovery_id] == 'AVAILABLE'
            if replica['name'] == path.basename(self.tmp_file3):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert (self.rse4recovery_id in replica['states']) is False
            if replica['name'] == path.basename(self.tmp_file4):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert (self.rse4recovery_id in replica['states']) is False
            if replica['name'] == path.basename(self.tmp_file5):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert (self.rse4recovery_id in replica['states']) is False

        # Checking if only self.tmp_file2 is declared as 'BAD'
        self.from_date = datetime.now() - timedelta(days=1)
        bad_replicas_list = list_bad_replicas_status(
            rse_id=self.rse4suspicious_id,
            younger_than=self.from_date,
            **self.vo)
        bad_checklist = [(badf['name'], badf['rse_id'], badf['state'])
                         for badf in bad_replicas_list]

        assert (path.basename(self.tmp_file1), self.rse4suspicious_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file2), self.rse4suspicious_id,
                BadFilesStatus.BAD) in bad_checklist
        assert (path.basename(self.tmp_file3), self.rse4suspicious_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file4), self.rse4suspicious_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file5), self.rse4suspicious_id,
                BadFilesStatus.BAD) not in bad_checklist

        bad_replicas_list = list_bad_replicas_status(
            rse_id=self.rse4recovery_id,
            younger_than=self.from_date,
            **self.vo)
        bad_checklist = [(badf['name'], badf['rse_id'], badf['state'])
                         for badf in bad_replicas_list]

        assert (path.basename(self.tmp_file1), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file2), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file3), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file4), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file5), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist

Example #16

0

Show file

    def test_abacus_rse(self):
        """ ABACUS (RSE): Test update of RSE usage. """
        # Get RSE usage of all sources
        self.session.query(models.UpdatedRSECounter).delete()  # pylint: disable=no-member
        self.session.query(models.RSEUsage).delete()  # pylint: disable=no-member
        self.session.commit()  # pylint: disable=no-member

        # Upload files -> RSE usage should increase
        self.files = [{'did_scope': self.scope, 'did_name': 'file_' + generate_uuid(), 'path': file_generator(size=self.file_sizes), 'rse': self.rse, 'lifetime': -1} for i in range(0, 2)]
        self.upload_client.upload(self.files)
        [os.remove(file['path']) for file in self.files]
        rse.run(once=True)
        rse_usage = get_rse_usage(rse_id=self.rse_id)[0]
        assert rse_usage['used'] == len(self.files) * self.file_sizes
        rse_usage_from_rucio = get_rse_usage(rse_id=self.rse_id, source='rucio')[0]
        assert rse_usage_from_rucio['used'] == len(self.files) * self.file_sizes
        rse_usage_from_unavailable = get_rse_usage(rse_id=self.rse_id, source='unavailable')
        assert len(rse_usage_from_unavailable) == 0

        # Delete files -> rse usage should decrease
        cleaner.run(once=True)
        if self.vo:
            reaper.run(once=True, include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse), greedy=True)
        else:
            reaper.run(once=True, include_rses=self.rse, greedy=True)
        rse.run(once=True)
        rse_usage = get_rse_usage(rse_id=self.rse_id)[0]
        assert rse_usage['used'] == 0
        rse_usage_from_rucio = get_rse_usage(rse_id=self.rse_id, source='rucio')[0]
        assert rse_usage_from_rucio['used'] == 0
        rse_usage_from_unavailable = get_rse_usage(rse_id=self.rse_id, source='unavailable')
        assert len(rse_usage_from_unavailable) == 0

Example #17

0

Show file

File: test_abacus_collection_replica.py Project: pradeepjasal/rucio

    def test_abacus_collection_replica(self):
        """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
        self.files = [{
            'did_scope': self.scope,
            'did_name': 'file_' + generate_uuid(),
            'path': file_generator(size=self.file_sizes),
            'rse': self.rse,
            'lifetime': -1
        } for _ in range(0, 2)]
        self.did_client.add_did(self.scope,
                                self.dataset,
                                DIDType.DATASET,
                                lifetime=-1)
        self.upload_client.upload(self.files)
        self.did_client.attach_dids(scope=self.scope,
                                    name=self.dataset,
                                    dids=[{
                                        'name': file['did_name'],
                                        'scope': file['did_scope']
                                    } for file in self.files])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        [os.remove(file['path']) for file in self.files]

        # Check dataset replica after rule creation - initial data
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['bytes'] == 0
        assert dataset_replica['length'] == 0
        assert dataset_replica['available_bytes'] == 0
        assert dataset_replica['available_length'] == 0
        assert str(dataset_replica['state']) == 'UNAVAILABLE'

        # Run Abacus
        collection_replica.run(once=True)

        # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['bytes'] == len(self.files) * self.file_sizes
        assert dataset_replica['length'] == len(self.files)
        assert dataset_replica['available_bytes'] == len(
            self.files) * self.file_sizes
        assert dataset_replica['available_length'] == len(self.files)
        assert str(dataset_replica['state']) == 'AVAILABLE'

        # Delete one file -> collection replica should be unavailable
        cleaner.run(once=True)
        delete_replicas(rse_id=self.rse_id,
                        files=[{
                            'name':
                            self.files[0]['did_name'],
                            'scope':
                            InternalScope(self.files[0]['did_scope'],
                                          **self.vo)
                        }])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['length'] == len(self.files)
        assert dataset_replica['bytes'] == len(self.files) * self.file_sizes
        assert dataset_replica['available_length'] == len(self.files) - 1
        assert dataset_replica['available_bytes'] == (len(self.files) -
                                                      1) * self.file_sizes
        assert str(dataset_replica['state']) == 'UNAVAILABLE'

        # Delete all files -> collection replica should be deleted
        # Old behaviour (doesn't delete the DID)
        cleaner.run(once=True)
        reaper.REGION.invalidate()
        if self.vo:
            reaper.run(once=True,
                       include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse),
                       greedy=True)
        else:
            reaper.run(once=True, include_rses=self.rse, greedy=True)
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ]
        assert dataset_replica[0]['length'] == 0
        assert dataset_replica[0]['available_length'] == 0

Example #18

0

Show file

    def setUp(self):
        if config_get_bool('common',
                           'multi_vo',
                           raise_exception=False,
                           default=False):
            self.vo = {
                'vo':
                config_get('client',
                           'vo',
                           raise_exception=False,
                           default='tst')
            }
        else:
            self.vo = {}

        self.replica_client = ReplicaClient()

        # Using two test RSEs
        self.rse4suspicious = 'MOCK_SUSPICIOUS'
        self.rse4suspicious_id = get_rse_id(self.rse4suspicious, **self.vo)
        self.rse4recovery = 'MOCK_RECOVERY'
        self.rse4recovery_id = get_rse_id(self.rse4recovery, **self.vo)
        self.scope = 'mock'
        self.internal_scope = InternalScope(self.scope, **self.vo)

        # For testing, we create 3 files and upload them to Rucio to two test RSEs.
        self.tmp_file1 = file_generator()
        self.tmp_file2 = file_generator()
        self.tmp_file3 = file_generator()

        self.listdids = [{
            'scope': self.internal_scope,
            'name': path.basename(f),
            'type': DIDType.FILE
        } for f in [self.tmp_file1, self.tmp_file2, self.tmp_file3]]

        for rse in [self.rse4suspicious, self.rse4recovery]:
            cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4}'.format(
                rse, self.scope, self.tmp_file1, self.tmp_file2,
                self.tmp_file3)
            exitcode, out, err = execute(cmd)

            # checking if Rucio upload went OK
            assert exitcode == 0

        # removing physical files from /tmp location - keeping only their DB info
        remove(self.tmp_file1)
        remove(self.tmp_file2)
        remove(self.tmp_file3)

        # Gather replica info
        replicalist = list_replicas(dids=self.listdids)

        # Changing the replica statuses as follows:
        # --------------------------------------------------------------------------------------------
        # Name         State(s) declared on MOCK_RECOVERY       State(s) declared on MOCK_SUSPICIOUS
        # --------------------------------------------------------------------------------------------
        # tmp_file1    available                                suspicious (available)
        # tmp_file2    available                                suspicious + bad (unavailable)
        # tmp_file3    unavailable                              suspicious (available)
        # --------------------------------------------------------------------------------------------

        for replica in replicalist:
            suspicious_pfns = replica['rses'][self.rse4suspicious_id]
            for i in range(3):
                print("Declaring suspicious file replica: " +
                      suspicious_pfns[0])
                self.replica_client.declare_suspicious_file_replicas([
                    suspicious_pfns[0],
                ], 'This is a good reason.')
                sleep(1)
            if replica['name'] == path.basename(self.tmp_file2):
                print("Declaring bad file replica: " + suspicious_pfns[0])
                self.replica_client.declare_bad_file_replicas([
                    suspicious_pfns[0],
                ], 'This is a good reason')
            if replica['name'] == path.basename(self.tmp_file3):
                print("Updating replica state as unavailable: " +
                      replica['rses'][self.rse4recovery_id][0])
                update_replica_state(self.rse4recovery_id, self.internal_scope,
                                     path.basename(self.tmp_file3),
                                     ReplicaState.UNAVAILABLE)

        # Gather replica info after setting initial replica statuses
        replicalist = list_replicas(dids=self.listdids)

        # Checking if the status changes were effective
        for replica in replicalist:
            if replica['name'] == path.basename(self.tmp_file1):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert replica['states'][self.rse4recovery_id] == 'AVAILABLE'
            if replica['name'] == path.basename(self.tmp_file2):
                assert (self.rse4suspicious_id in replica['states']) is False
                assert replica['states'][self.rse4recovery_id] == 'AVAILABLE'
            if replica['name'] == path.basename(self.tmp_file3):
                assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE'
                assert (self.rse4recovery_id in replica['states']) is False

        # Checking if only self.tmp_file2 is declared as 'BAD'
        self.from_date = datetime.now() - timedelta(days=1)
        bad_replicas_list = list_bad_replicas_status(
            rse_id=self.rse4suspicious_id,
            younger_than=self.from_date,
            **self.vo)
        bad_checklist = [(badf['name'], badf['rse_id'], badf['state'])
                         for badf in bad_replicas_list]

        assert (path.basename(self.tmp_file2), self.rse4suspicious_id,
                BadFilesStatus.BAD) in bad_checklist
        assert (path.basename(self.tmp_file1), self.rse4suspicious_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file3), self.rse4suspicious_id,
                BadFilesStatus.BAD) not in bad_checklist

        bad_replicas_list = list_bad_replicas_status(
            rse_id=self.rse4recovery_id,
            younger_than=self.from_date,
            **self.vo)
        bad_checklist = [(badf['name'], badf['rse_id'], badf['state'])
                         for badf in bad_replicas_list]

        assert (path.basename(self.tmp_file1), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file2), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist
        assert (path.basename(self.tmp_file3), self.rse4recovery_id,
                BadFilesStatus.BAD) not in bad_checklist

Example #19

0

Show file

File: test_download.py Project: pradeepjasal/rucio

def test_download_file_with_supported_protocol_from_config(
        rse_factory, did_factory, download_client, mock_scope):
    """ Download (CLIENT): Ensure the module associated to the first protocol supported by both the remote and local config read from rucio.cfg is called """

    rse, rse_id = rse_factory.make_rse()

    # FIXME:
    # The correct order to test should actually be scp,file,root
    # However the preferred_impl is not working correctly.
    # Once preferred_impl is fixed, this should be changed back
    add_protocol(
        rse_id, {
            'scheme': 'scp',
            'hostname': '%s.cern.ch' % rse_id,
            'port': 0,
            'prefix': '/test/',
            'impl': 'rucio.rse.protocols.posix.Default',
            'domains': {
                'lan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                },
                'wan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                }
            }
        })
    add_protocol(
        rse_id, {
            'scheme': 'file',
            'hostname': '%s.cern.ch' % rse_id,
            'port': 0,
            'prefix': '/test/',
            'impl': 'rucio.rse.protocols.scp.Default',
            'domains': {
                'lan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                },
                'wan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                }
            }
        })
    add_protocol(
        rse_id, {
            'scheme': 'root',
            'hostname': '%s.cern.ch' % rse_id,
            'port': 0,
            'prefix': '/test/',
            'impl': 'rucio.rse.protocols.xrootd.Default',
            'domains': {
                'lan': {
                    'read': 3,
                    'write': 3,
                    'delete': 3
                },
                'wan': {
                    'read': 3,
                    'write': 3,
                    'delete': 3
                }
            }
        })

    config_add_section('download')
    config_set('download', 'preferred_impl', 'rclone, xrootd')

    supported_impl = 'xrootd'

    path = file_generator()
    name = os.path.basename(path)
    item = {
        'path': path,
        'rse': rse,
        'did_scope': str(mock_scope),
        'did_name': name,
        'guid': generate_uuid(),
    }
    did_factory.upload_client.upload([item])
    did_str = '%s:%s' % (mock_scope, name)

    with patch('rucio.rse.protocols.%s.Default.get' % supported_impl, side_effect=lambda pfn, dest, **kw: shutil.copy(path, dest)) as mock_get, \
            patch('rucio.rse.protocols.%s.Default.connect' % supported_impl),\
            patch('rucio.rse.protocols.%s.Default.close' % supported_impl):
        download_client.download_dids([{
            'did': did_str,
            'impl': supported_impl
        }])
        mock_get.assert_called()

Example #20

0

Show file

File: test_download.py Project: pradeepjasal/rucio

def test_download_file_with_impl(rse_factory, did_factory, download_client,
                                 mock_scope):
    """ Download (CLIENT): Ensure the module associated to the impl value is called """

    impl = 'xrootd'
    rse, rse_id = rse_factory.make_rse()
    add_protocol(
        rse_id, {
            'scheme': 'file',
            'hostname': '%s.cern.ch' % rse_id,
            'port': 0,
            'prefix': '/test/',
            'impl': 'rucio.rse.protocols.posix.Default',
            'domains': {
                'lan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                },
                'wan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                }
            }
        })
    add_protocol(
        rse_id, {
            'scheme': 'root',
            'hostname': '%s.cern.ch' % rse_id,
            'port': 0,
            'prefix': '/test/',
            'impl': 'rucio.rse.protocols.xrootd.Default',
            'domains': {
                'lan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                },
                'wan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                }
            }
        })
    path = file_generator()
    name = os.path.basename(path)
    item = {
        'path': path,
        'rse': rse,
        'did_scope': str(mock_scope),
        'did_name': name,
        'guid': generate_uuid(),
    }
    did_factory.upload_client.upload([item])
    did_str = '%s:%s' % (mock_scope, name)
    with patch('rucio.rse.protocols.%s.Default.get' % impl, side_effect=lambda pfn, dest, **kw: shutil.copy(path, dest)) as mock_get, \
            patch('rucio.rse.protocols.%s.Default.connect' % impl),\
            patch('rucio.rse.protocols.%s.Default.close' % impl):
        download_client.download_dids([{'did': did_str, 'impl': impl}])
        mock_get.assert_called()

Example #21

0

Show file

File: test_abacus_collection_replica.py Project: ijjorama/rucio

    def test_abacus_collection_replica(self):
        """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
        self.files = [{
            'did_scope': self.scope,
            'did_name': 'file_' + generate_uuid(),
            'path': file_generator(size=self.file_sizes),
            'rse': self.rse,
            'lifetime': -1
        } for i in range(0, 2)]
        self.did_client.add_did(self.scope,
                                self.dataset,
                                DIDType.DATASET,
                                lifetime=-1)
        self.upload_client.upload(self.files)
        self.did_client.attach_dids(scope=self.scope,
                                    name=self.dataset,
                                    dids=[{
                                        'name': file['did_name'],
                                        'scope': file['did_scope']
                                    } for file in self.files])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        [os.remove(file['path']) for file in self.files]

        # Check dataset replica after rule creation - initial data
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert_equal(dataset_replica['bytes'], 0)
        assert_equal(dataset_replica['length'], 0)
        assert_equal(dataset_replica['available_bytes'], 0)
        assert_equal(dataset_replica['available_length'], 0)
        assert_equal(str(dataset_replica['state']), 'UNAVAILABLE')

        # Run Abacus
        collection_replica.run(once=True)

        # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert_equal(dataset_replica['bytes'],
                     len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['length'], len(self.files))
        assert_equal(dataset_replica['available_bytes'],
                     len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['available_length'], len(self.files))
        assert_equal(str(dataset_replica['state']), 'AVAILABLE')

        # Delete one file -> collection replica should be unavailable
        cleaner.run(once=True)
        delete_replicas(rse_id=self.rse_id,
                        files=[{
                            'name':
                            self.files[0]['did_name'],
                            'scope':
                            InternalScope(self.files[0]['did_scope'],
                                          **self.vo)
                        }])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert_equal(dataset_replica['length'], len(self.files))
        assert_equal(dataset_replica['bytes'],
                     len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['available_length'], len(self.files) - 1)
        assert_equal(dataset_replica['available_bytes'],
                     (len(self.files) - 1) * self.file_sizes)
        assert_equal(str(dataset_replica['state']), 'UNAVAILABLE')

        # Delete all files -> collection replica should be deleted
        cleaner.run(once=True)
        reaper.run(once=True, rses=[self.rse], greedy=True)
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ]
        assert_equal(len(dataset_replica), 0)