Beispiel #1
0
    def getReplicaInfoForBlocks(self,
                                scope='cms',
                                dataset=None,
                                block=None,
                                node=None):  # Mirroring PhEDEx service
        """
        _blockreplicas_
        Get replicas for given blocks

        dataset        dataset name, can be multiple (*)
        block          block name, can be multiple (*)
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since  unix timestamp, only return replicas updated since this
                time
        create_since   unix timestamp, only return replicas created since this
                time
        complete       y or n, whether or not to require complete or incomplete
                blocks. Default is to return either
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                to return either.
        group          group name.  default is to return replicas for any group.
        """

        block_names = []
        result = {'block': []}

        rc = ReplicaClient(account=self.account, auth_type=self.auth_type)

        if isinstance(block, (list, set)):
            block_names = block
        elif block:
            block_names = [block]

        if isinstance(dataset, (list, set)):
            for dataset_name in dataset:
                block_names.extend(
                    self.cmsBlocksInContainer(dataset_name, scope=scope))
        elif dataset:
            block_names.extend(self.cmsBlocksInContainer(dataset, scope=scope))

        for block_name in block_names:
            dids = [{'scope': scope, 'name': block_name}]

            response = rc.list_replicas(dids=dids)
            nodes = set()
            for item in response:
                for node, state in item['states'].items():
                    if state.upper() == 'AVAILABLE':
                        nodes.add(node)
            result['block'].append({block_name: list(nodes)})
        return result
Beispiel #2
0
    def __lfns2pfns_client(self, lfns):
        """ Provides the path of a replica for non-detemernisic sites. Will be assigned to get path by the __init__ method if neccessary.

            :param scope: list of DIDs

            :returns: dict with scope:name as keys and PFN as value (in case of errors the Rucio exception si assigned to the key)
        """
        client = ReplicaClient()
        pfns = {}

        lfns = [lfns] if type(lfns) == dict else lfns
        for lfn in lfns:
            scope = lfn['scope']
            name = lfn['name']
            replicas = [r for r in client.list_replicas([{'scope': scope, 'name': name}, ], schemes=[self.attributes['scheme'], ])]  # schemes is used to narrow down the response message.
            if len(replicas) > 1:
                pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('This operation can only be performed for files.')
            if not len(replicas):
                pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('File not found.')
            pfns['%s:%s' % (scope, name)] = replicas[0]['rses'][self.rse['rse']][0] if (self.rse['rse'] in replicas[0]['rses'].keys()) else exception.RSEOperationNotSupported('Replica not found on given RSE.')
        return pfns
Beispiel #3
0
    def __lfns2pfns_client(self, lfns):
        """ Provides the path of a replica for non-deterministic sites. Will be assigned to get path by the __init__ method if neccessary.

            :param scope: list of DIDs

            :returns: dict with scope:name as keys and PFN as value (in case of errors the Rucio exception si assigned to the key)
        """
        client = ReplicaClient()
        pfns = {}

        lfns = [lfns] if type(lfns) == dict else lfns
        for lfn in lfns:
            scope = lfn['scope']
            name = lfn['name']
            replicas = [r for r in client.list_replicas([{'scope': scope, 'name': name}, ], schemes=[self.attributes['scheme'], ])]  # schemes is used to narrow down the response message.
            if len(replicas) > 1:
                pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('This operation can only be performed for files.')
            if not len(replicas):
                pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('File not found.')
            pfns['%s:%s' % (scope, name)] = replicas[0]['rses'][self.rse['rse']][0] if (self.rse['rse'] in replicas[0]['rses'].keys()) else exception.RSEOperationNotSupported('Replica not found on given RSE.')
        return pfns
Beispiel #4
0
class ShowRun():
    def __init__(self):

        #Take all data types categories
        self.RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['raw_records_tpc_types']
        self.RAW_RECORDS_MV_TYPES = helper.get_hostconfig(
        )['raw_records_mv_types']
        self.RAW_RECORDS_NV_TYPES = helper.get_hostconfig(
        )['raw_records_nv_types']
        self.LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['light_raw_records_tpc_types']
        self.LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig(
        )['light_raw_records_mv_types']
        self.LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig(
        )['light_raw_records_nv_types']
        self.HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types']
        self.RECORDS_TYPES = helper.get_hostconfig()['records_types']

        #Choose which data type you want to treat
        self.DTYPES = self.RAW_RECORDS_TPC_TYPES + self.RAW_RECORDS_MV_TYPES + self.RAW_RECORDS_NV_TYPES + self.LIGHT_RAW_RECORDS_TPC_TYPES + self.LIGHT_RAW_RECORDS_MV_TYPES + self.LIGHT_RAW_RECORDS_NV_TYPES + self.HIGH_LEVEL_TYPES + self.RECORDS_TYPES

        #Take the list of all XENON RSEs
        self.RSES = helper.get_hostconfig()['rses']

        #Take the RSE that is used to perform the upload
        self.UPLOAD_TO = helper.get_hostconfig()['upload_to']

        #Take the directory where datamanager has to upload data
        self.DATADIR = helper.get_hostconfig()['path_data_to_upload']

        # Get the sequence of rules to be created according to the data type
        self.RAW_RECORDS_TPC_RSES = helper.get_hostconfig(
        )["raw_records_tpc_rses"]
        self.RAW_RECORDS_MV_RSES = helper.get_hostconfig(
        )["raw_records_mv_rses"]
        self.RAW_RECORDS_NV_RSES = helper.get_hostconfig(
        )["raw_records_nv_rses"]
        self.LIGHT_RAW_RECORDS_TPC_RSES = helper.get_hostconfig(
        )["light_raw_records_tpc_rses"]
        self.LIGHT_RAW_RECORDS_MV_RSES = helper.get_hostconfig(
        )["light_raw_records_mv_rses"]
        self.LIGHT_RAW_RECORDS_NV_RSES = helper.get_hostconfig(
        )["light_raw_records_nv_rses"]
        self.HIGH_LEVEL_RSES = helper.get_hostconfig()["high_level_rses"]
        self.RECORDS_RSES = helper.get_hostconfig()["records_rses"]

        #Init the runDB
        self.db = ConnectMongoDB()

        #Init Rucio for later uploads and handling:
        self.rc = RucioSummoner()
        self.didclient = DIDClient()
        self.replicaclient = ReplicaClient()

        #Rucio Rule assignment priority
        self.priority = 3

        #Parameters to write warnings
        self.minimum_number_acceptable_rses = 2
        self.minimum_deltadays_allowed = 3

    def showrun(self, arg_number, arg_to, arg_dtypes, arg_compact,
                arg_dumpjson, arg_status, arg_latest, arg_pending):

        #Define data types
        RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['raw_records_tpc_types']
        RAW_RECORDS_MV_TYPES = helper.get_hostconfig()['raw_records_mv_types']
        RAW_RECORDS_NV_TYPES = helper.get_hostconfig()['raw_records_nv_types']
        LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['light_raw_records_tpc_types']
        LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig(
        )['light_raw_records_mv_types']
        LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig(
        )['light_raw_records_nv_types']
        HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types']
        RECORDS_TYPES = helper.get_hostconfig()['records_types']

        #Get other parameters
        DATADIR = helper.get_hostconfig()['path_data_to_upload']
        RSES = helper.get_hostconfig()['rses']

        minimum_number_acceptable_rses = 2
        minimum_deltadays_allowed = 3

        # Storing some backup hashes in case DID information is not available
        bkp_hashes = {
            'raw_records': 'rfzvpzj4mf',
            'raw_records_he': 'rfzvpzj4mf',
            'raw_records_mv': 'rfzvpzj4mf',
            'raw_records_aqmon': 'rfzvpzj4mf',
            'records': '56ausr64s7',
            'lone_hits': 'b7dgmtzaef'
        }

        context = 'xenonnt_online'

        #Init the runDB
        db = ConnectMongoDB()

        #Init Rucio for later uploads and handling:
        rc = RucioSummoner(helper.get_hostconfig("rucio_backend"))
        rc.SetRucioAccount(helper.get_hostconfig('rucio_account'))
        rc.SetConfigPath(helper.get_hostconfig("rucio_cli"))
        rc.SetProxyTicket(helper.get_hostconfig('rucio_x509'))
        rc.SetHost(helper.get_hostconfig('host'))
        rc.ConfigHost()
        rc.SetProxyTicket("rucio_x509")

        data_types = RAW_RECORDS_TPC_TYPES + RAW_RECORDS_MV_TYPES + RAW_RECORDS_NV_TYPES + LIGHT_RAW_RECORDS_TPC_TYPES + LIGHT_RAW_RECORDS_MV_TYPES + LIGHT_RAW_RECORDS_NV_TYPES + HIGH_LEVEL_TYPES + RECORDS_TYPES

        # if arg_number has been given
        if arg_number != "":

            # if the "number" argument is a number, it is converted as integer
            if arg_number.isdigit():
                arg_number = int(arg_number)
            # otherwise it is assumed that a DID has been given and run number and other parameters are extracted from the DID
            else:
                arg_number, dtype, hash = get_did(arg_number)
                arg_dtypes = [dtype]

        # if no arg_number has been given, then the "latest" option is activated (with 5 run numbers by default) in compact modality
        else:
            if arg_latest == 0:
                arg_latest = 5
                arg_compact = True

        if arg_latest > 0:
            cursor = db.db.find({}).sort('number', pymongo.DESCENDING).limit(1)
            cursor = list(cursor)
            arg_to = cursor[0]['number']
            arg_number = arg_to - arg_latest + 1
            print('Processing latest {0} runs'.format(arg_latest))

        if arg_to > arg_number:
            cursor = db.db.find({
                'number': {
                    '$gte': arg_number,
                    '$lte': arg_to
                }
            }).sort('number', pymongo.ASCENDING)
            print('Runs that will be processed are from {0} to {1}'.format(
                arg_number, arg_to))
        else:
            cursor = db.db.find({'number': arg_number})

        print('Run that will be processed is {0}'.format(arg_number))
        cursor = list(cursor)

        # Runs over all listed runs
        for run in cursor:

            print("")

            # Gets run number
            number = run['number']
            print('Run: {0}'.format(number))

            # Gets the status
            if 'status' in run:
                print('Status: {0}'.format(run['status']))
            else:
                print('Status: {0}'.format('Not available'))

            if arg_status:
                continue

            # Extracts the correct Event Builder machine who processed this run
            # Then also the bootstrax state and, in case it was abandoned, the reason
            if 'bootstrax' in run:
                bootstrax = run['bootstrax']
                eb = bootstrax['host'].split('.')[0]
                print('Processed by: {0}'.format(eb))
                if 'state' in bootstrax:
                    print('Bootstrax state: {0}'.format(bootstrax['state']))
                    if bootstrax['state'] == 'abandoned':
                        if 'reason' in bootstrax:
                            print('Reason: {0}'.format(bootstrax['reason']))
            else:
                print('Not processed')

            # Gets the date
            if 'start' in run:
                start_time = run['start'].replace(tzinfo=timezone.utc)
                print("Date: ", start_time.astimezone(tz=None))

                # Calculates the duration
                if 'end' in run:
                    if run['end'] is not None:
                        end_time = run['end'].replace(tzinfo=timezone.utc)
                        duration = end_time - start_time
                        print("Duration: ", duration)
                    else:
                        print("Duration: ", "unknown")

                # Prints if run is still enough recent (three days from now)
                now_time = datetime.now().replace(tzinfo=timezone.utc)
                delta_time = now_time - start_time
                if delta_time < timedelta(days=minimum_deltadays_allowed):
                    print("Less than {0} days old".format(
                        minimum_deltadays_allowed))
            else:
                print("Warning : no time info available")

            # Gets the comments
            if 'comments' in run:
                if len(run['comments']) > 0:
                    last_comment = run['comments'][-1]
                    print("Latest comment ({0}): {1}".format(
                        last_comment['user'], last_comment['comment']))

            # Dumps the entire rundoc under json format
            if arg_dumpjson:
                print(dumps(run, indent=4))

            if arg_compact:
                continue

            # Merges data and deleted_data

    #        if 'deleted_data' in run:
    #            data = run['data'] + run['deleted_data']
    #        else:
            data = run['data']

            # Check is there are more instances in more EventBuilders
            extra_ebs = set()
            for d in data:
                if 'eb' in d['host'] and eb not in d['host']:
                    extra_ebs.add(d['host'].split('.')[0])
            if len(extra_ebs) > 0:
                print(
                    '\t\t Warning : The run has been processed by more than one EventBuilder: {0}'
                    .format(extra_ebs))

            # Runs over all data types to be monitored
            for dtype in data_types:

                if len(arg_dtypes) > 0:
                    if dtype not in arg_dtypes:
                        continue

                # Take the official number of files accordingto run DB
                # and the eb status
                Nfiles = -1
                ebstatus = ""
                for d in data:
                    if d['type'] == dtype and eb in d['host']:
                        if 'file_count' in d:
                            Nfiles = d['file_count']
                        if 'status' in d:
                            ebstatus = d['status']

                if arg_pending:
                    if ebstatus in ["", "transferred"]:
                        continue

                # Data type name
                print('{0}'.format(dtype))

                if Nfiles == -1:
                    print('\t Number of files: missing in DB')
                else:
                    print('\t Number of files: {0}'.format(Nfiles))

                if ebstatus != "":
                    print('\t EB status: {0}'.format(ebstatus))
                else:
                    print('\t EB status: not available')

                # Check if data are still in the data list and not in deleted_data
                DB_InEB = False
                for d in run['data']:
                    if d['type'] == dtype and eb in d['host']:
                        DB_InEB = True
                DB_NotInEB = False
                if 'deleted_data' in run:
                    for d in run['deleted_data']:
                        if d['type'] == dtype and eb in d['host']:
                            DB_NotInEB = True
                if DB_InEB and not DB_NotInEB:
                    print('\t DB : still in EB')
                if not DB_InEB and DB_NotInEB:
                    print('\t DB : deleted from EB')
                if DB_InEB and DB_NotInEB:
                    print(
                        '\t\t Incoherency in DB: it is both in data list and in deleted_data list'
                    )
                #if (DB_InEB and DB_NotInEB) or (not DB_InEB and not DB_NotInEB):
                #  print('\t\t incoherency in DB: it is neither in data list nor in deleted_data list')

                # Check if data are still in the EB disks without using the DB
                upload_path = ""
                for d in run['data']:
                    if d['type'] == dtype and eb in d['host']:
                        file = d['location'].split('/')[-1]
                        upload_path = os.path.join(DATADIR, eb, file)
                path_exists = os.path.exists(upload_path)
                if upload_path != "" and path_exists:
                    path, dirs, files = next(os.walk(upload_path))
                    print('\t Disk: still in EB disk and with', len(files),
                          'files')
                else:
                    print('\t Disk: not in EB disk')
                if DB_InEB and not path_exists:
                    print(
                        '\t\t Incoherency in DB and disk: it is in DB data list but it is not in the disk'
                    )
                if DB_NotInEB and path_exists:
                    print(
                        '\t\t Incoherency in DB and disk: it is in DB deleted_data list but it is still in the disk'
                    )

                # The list of DIDs (usually just one)
                dids = set()
                for d in data:
                    if d['type'] == dtype and d['host'] == 'rucio-catalogue':
                        if 'did' in d:
                            dids.add(d['did'])
                print('\t DID:', dids)

                # Check the presence in each available RSE
                Nrses = 0
                for rse in RSES:
                    is_in_rse = False
                    for d in run['data']:
                        if d['type'] == dtype and rse in d['location']:
                            if 'status' in d:
                                status = d['status']
                            else:
                                status = 'Not available'
                            if 'did' in d:
                                hash = d['did'].split('-')[-1]
                                did = d['did']
                            else:
                                print(
                                    '\t\t Warning : DID information is absent in DB data list (old admix version). Using standard hashes for RSEs'
                                )
                                #hash = bkp_hashes.get(dtype)
                                #hash = utilix.db.get_hash(context, dtype)
                                hash = db.GetHashByContext(context, dtype)
                                did = make_did(number, dtype, hash)
                            rucio_rule = rc.GetRule(upload_structure=did,
                                                    rse=rse)
                            files = list_file_replicas(number, dtype, hash,
                                                       rse)
                            if rucio_rule['exists']:
                                print('\t', rse + ': DB Yes, Status', status,
                                      ', Rucio Yes, State',
                                      rucio_rule['state'], ",", len(files),
                                      'files')
                                if len(files) < Nfiles and rucio_rule[
                                        'state'] != "REPLICATING":
                                    print(
                                        '\t\t Warning : Wrong number of files in Rucio!!!'
                                    )
                            else:
                                print('\t', rse + ': DB Yes, Status', status,
                                      ', Rucio No')
                            # print(files)
                            is_in_rse = True
                            Nrses += 1
                    if not is_in_rse:
                        #                    print('\t\t Warning : data information is absent in DB data list. Trying using standard hashes to query Rucio')
                        #                    hash = bkp_hashes.get(dtype)
                        #hash = utilix.db.get_hash(context, dtype)
                        hash = db.GetHashByContext(context, dtype)
                        did = make_did(number, dtype, hash)
                        print('\t Guessed DID:', did)
                        rucio_rule = rc.GetRule(upload_structure=did, rse=rse)
                        files = list_file_replicas(number, dtype, hash, rse)
                        if rucio_rule['exists']:
                            print('\t', rse + ': DB No, Rucio Yes, State',
                                  rucio_rule['state'], ",", len(files),
                                  'files')
                            if len(files) < Nfiles and rucio_rule[
                                    'state'] != "REPLICATING":
                                print(
                                    '\t\t Warning : Wrong number of files in Rucio!!!'
                                )
                        else:
                            print('\t', rse + ': DB No, Rucio No')
                print('\t Number of sites: ', Nrses)

    def showrun_new(self, arg_number, arg_to, arg_dtypes, arg_compact,
                    arg_dumpjson, arg_status, arg_latest, arg_pending):

        # if arg_number has been given
        if arg_number != "":

            # if the "number" argument is a number, it is converted as integer
            if arg_number.isdigit():
                arg_number = int(arg_number)
            # otherwise it is assumed that a DID has been given and run number and other parameters are extracted from the DID
            else:
                arg_number, dtype, hash = get_did(arg_number)
                arg_dtypes = [dtype]

        # if no arg_number has been given, then the "latest" option is activated (with 5 run numbers by default) in compact modality
        else:
            if arg_latest == 0:
                arg_latest = 5
                arg_compact = True

        if arg_latest > 0:
            cursor = self.db.db.find({}).sort('number',
                                              pymongo.DESCENDING).limit(1)
            cursor = list(cursor)
            arg_to = cursor[0]['number']
            arg_number = arg_to - arg_latest + 1
            print('Processing latest {0} runs'.format(arg_latest))

        if arg_to > arg_number:
            cursor = self.db.db.find({
                'number': {
                    '$gte': arg_number,
                    '$lte': arg_to
                }
            }).sort('number', pymongo.ASCENDING)
            print('Runs that will be processed are from {0} to {1}'.format(
                arg_number, arg_to))
        else:
            cursor = self.db.db.find({'number': arg_number})
            print('Run that will be processed is {0}'.format(arg_number))

        cursor = list(cursor)

        # Runs over all listed runs
        for run in cursor:

            print("")

            # Gets run number
            number = run['number']
            print('Run: {0}'.format(number))

            # Gets the status
            if 'status' in run:
                print('Status: {0}'.format(run['status']))
            else:
                print('Status: {0}'.format('Not available'))

            if arg_status:
                continue

            # Extracts the correct Event Builder machine who processed this run
            # Then also the bootstrax state and, in case it was abandoned, the reason
            if 'bootstrax' in run:
                bootstrax = run['bootstrax']
                eb = bootstrax['host'].split('.')[0]
                print('Processed by: {0}'.format(eb))
                if 'state' in bootstrax:
                    print('Bootstrax state: {0}'.format(bootstrax['state']))
                    if bootstrax['state'] == 'abandoned':
                        if 'reason' in bootstrax:
                            print('Reason: {0}'.format(bootstrax['reason']))
            else:
                print('Not processed by EB')

            # Gets the date
            if 'start' in run:
                start_time = run['start'].replace(tzinfo=timezone.utc)
                print("Date: ", start_time.astimezone(tz=None))

                # Calculates the duration
                if 'end' in run:
                    if run['end'] is not None:
                        end_time = run['end'].replace(tzinfo=timezone.utc)
                        duration = end_time - start_time
                        print("Duration: ", duration)
                    else:
                        print("Duration: ", "unknown")

                # Prints if run is still enough recent (three days from now)
                now_time = datetime.now().replace(tzinfo=timezone.utc)
                delta_time = now_time - start_time
                if delta_time < timedelta(days=self.minimum_deltadays_allowed):
                    print("Less than {0} days old".format(
                        self.minimum_deltadays_allowed))
            else:
                print("Warning : no time info available")

            # Gets the comments
            if 'comments' in run:
                if len(run['comments']) > 0:
                    last_comment = run['comments'][-1]
                    print("Latest comment ({0}): {1}".format(
                        last_comment['user'], last_comment['comment']))

            # Dumps the entire rundoc under json format
            if arg_dumpjson:
                print(dumps(run, indent=4))

            if arg_compact:
                continue

            # Runs over all data types stored in data and deleted_data fields
            alldata = run['data']
            if 'deleted_data' in run:
                alldata = alldata + run['deleted_data']

            for datum in alldata:

                if arg_pending:
                    ebstatus = ""
                    if 'status' in datum:
                        ebstatus = datum['status']
                    if ebstatus in ["", "transferred"]:
                        continue

                if len(arg_dtypes) > 0:
                    if datum['type'] not in arg_dtypes:
                        continue

                if eb in datum['host']:
                    self.showdataset(run, datum)

    def showdataset(self, run, datum):

        #print(dumps(datum, indent=4))

        # skip dataset if it does not have location
        if 'location' not in datum:
            print('Dataset: type {0} without location. Skipping'.format(
                datum['type']))
            return

        # Dataset name
        number = run['number']
        dtype = datum['type']
        hash = datum['location'].split('-')[-1]
        did = make_did(number, dtype, hash)
        print('Dataset: {0}'.format(did))

        # Event builder who treated it
        eb = datum['host'].split('.')[0]

        # Directory name
        directory = datum['location'].split('/')[-1]

        # Take the official number of files according to run DB
        Nfiles = -1
        if 'file_count' in datum:
            Nfiles = datum['file_count']
        if Nfiles == -1:
            print('\t Number of files: missing in DB')
        else:
            print('\t Number of files: {0}'.format(Nfiles))

        # Take the status of the EB dataset according to run DB
        ebstatus = ""
        if 'status' in datum:
            ebstatus = datum['status']
        if ebstatus != "":
            print('\t EB status: {0}'.format(ebstatus))
        else:
            print('\t EB status: not available')

        # Check if there are double entries in the DB
        Copies = 0
        for d in run['data']:
            if d['type'] == dtype and eb in d['host'] and hash in d['location']:
                Copies = Copies + 1
        if Copies > 1:
            print('\t\t Warning {0}: EB datum has a double entry in the DB'.
                  format(did))

        # Check if there are other entries in the deleted_data (even with different EBs)
        #DeletedCopies = []
        #for d in run['deleted_data']:
        #    if d['type'] == dtype and hash in d['location']:
        #        DeletedCopies.append(d['host'].split('.')[0])
        #if len(DeletedCopies)>0:
        #    print('\t Previously deleted data processed with those EBs: {0}'.format(DeletedCopies))

        # Read the real number of files present in EB disks
        upload_path = os.path.join(self.DATADIR, eb, directory)
        path_exists = os.path.exists(upload_path)
        Nfiles_disk = 0
        if path_exists:
            path, dirs, files = next(os.walk(upload_path))
            Nfiles_disk = len(files)

        # If data are supposed to be (according to DB) still present in EB, check if they are there
        if datum in run['data']:
            print('\t Still in EB')
            if Nfiles_disk != Nfiles:
                print(
                    '\t\t Warning {0}: number of files in EB disk ({1}) does not match with the DB info ({2})'
                    .format(did, Nfiles_disk, Nfiles))

        # Otherwise, if data are supposed to be (according to DB) deleted, check if they are really absent
        elif datum in run['deleted_data']:
            print('\t Deleted from EB')
            if Nfiles_disk > 0:
                print(
                    '\t\t Warning {0}: files are still in EB disk (nfiles={1}) while DB says they are deleted '
                    .format(did, Nfiles_disk))

        # Query rucio to see how many RSEs have those data
        rules = list(
            self.didclient.list_did_rules(
                did.split(':')[0],
                did.split(':')[1]))
        rses_with_data = []
        for rule in rules:
            rses_with_data.append(rule['rse_expression'])
        if len(rses_with_data) > 0:
            print('\t Rucio replicas in {0} RSEs : {1}'.format(
                len(rses_with_data), rses_with_data))
        else:
            print('\t No replicas in Rucio')

        # Check the presence of data in each available RSE and compatibility with DB

        # Step 1: prepare the dictionary
        rses = []
        for rse in self.RSES:
            r = {}
            r['name'] = rse
            rses.append(r)

        Nrses = 0

        # Step 2: filling the dictionary with RSEs info from DB and from Rucio
        for rse in rses:
            is_in_rse = False

            # Get info available in the DB
            rse['DBentries'] = 0
            rse['DBStatus'] = ""
            for d in run['data']:
                if 'rucio' in d['host']:
                    if d['did'] == did and rse['name'] in d['location']:
                        if 'status' in d:
                            rse['DBStatus'] = d['status']
                        rse['DBentries'] = rse['DBentries'] + 1

            # Get info available in Rucio
            rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse['name'])
            #            files = list_file_replicas(number, dtype, hash, rse['name'])
            #            files = list(self.rc.ListFileReplicas(did,rse['name'],localpath=True).values())
            did_dictionary = [{
                'scope': did.split(':')[0],
                'name': did.split(':')[1]
            }]
            replicas = list(
                self.replicaclient.list_replicas(did_dictionary,
                                                 rse_expression=rse['name']))
            #print(dumps(replicas, indent=4))
            rse['RucioExists'] = rucio_rule['exists']
            rse['RucioNFiles'] = len(replicas)

        # Step 3: analysis of data
        for rse in rses:

            #print(rse)

            # analysis specific for uploading
            if rse['name'] == self.UPLOAD_TO:

                # Case 1 : loss of Rucio connection at the end of the upload before creating the rule
                if rse['RucioNFiles'] == Nfiles and not rse[
                        'RucioExists'] and rse['DBStatus'] == "" and rse[
                            'DBentries'] == 0 and len(rses_with_data) == 0:
                    print(
                        '\t\t Warning: files have been uploaded but the rule has not been created'
                    )
                    print(
                        '\t\t Hint: create the rule manually, then continue uploading, using the following three commands:'
                    )
                    print('\t\t\t rucio add-rule {0} 1 {1}'.format(
                        did, rse['name']))
                    print('\t\t\t admix-fix --fix_upload_db {0}'.format(did))
                    print('\t\t\t admix-fix --create_upload_rules {0}'.format(
                        did))
#                    os.system('rucio add-rule {0} 1 {1}'.format(did,rse['name']))
#                    os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did))
#                    os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did))

# Case 2 : loss of Rucio connection at the end of the upload before updating the DB
                if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "" and rse['DBentries'] == 0 and len(
                            rses_with_data) == 1:
                    print(
                        '\t\t Warning: the upload is completed, but DB needs to be updated and rules have to be created abroad'
                    )
                    print('\t\t Hint: fix it manually with the two commands:')
                    print('\t\t\t admix-fix --fix_upload_db {0}'.format(did))
                    print('\t\t\t admix-fix --create_upload_rules {0}'.format(
                        did))
#                    os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did))
#                    os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did))

# Case 3 : loss of Rucio connection at the end of the upload before creating the rules abroad
                if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "transferred" and rse[
                            'DBentries'] == 1 and len(rses_with_data) == 1:
                    print(
                        '\t\t Warning: the upload is completed and the DB updated, but rules have to be created abroad'
                    )
                    print('\t\t Hint: fix it manually with the command:')
                    print('\t\t\t admix-fix --create_upload_rules {0}'.format(
                        did))
#                    os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did))

# Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it
                if rse['RucioNFiles'] == 0 and not rse['RucioExists'] and rse[
                        'DBStatus'] == "" and rse['DBentries'] == 0 and len(
                            rses_with_data) == 0 and ebstatus not in [
                                "", "transferred"
                            ]:
                    print(
                        '\t\t Warning: the upload never started but the EB status is not empty, hence admix cannot upload it'
                    )
                    print(
                        '\t\t Hint: fix it manually with the following command to allow admix upload manager to take care of it:'
                    )
                    print(
                        '\t\t\t admix-fix --set_eb_status {0} eb_ready_to_upload'
                        .format(did))
#                    os.system('~/.local/bin/admix-fix --set_eb_status {0} eb_ready_to_upload'.format(did))

# Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it
                if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "transferred" and rse[
                            'DBentries'] == 1 and len(
                                rses_with_data) > 0 and ebstatus not in [
                                    "", "transferred"
                                ]:
                    print(
                        '\t\t Warning: the upload is completed and there are also copies abroad'
                    )
                    print(
                        '\t\t Hint: fix it manually with the command below to flag the EB datum as transferred:'
                    )
                    print('\t\t\t admix-fix --set_eb_status {0} transferred'.
                          format(did))
#                    os.system('~/.local/bin/admix-fix --set_eb_status {0} transferred'.format(did))

# Case 5 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it
                if rse['RucioNFiles'] != Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "" and rse['DBentries'] == 0 and len(
                            rses_with_data
                        ) == 1 and ebstatus == "transferring":
                    print(
                        '\t\t Warning: the upload has been interrupted during the copy'
                    )
                    print(
                        '\t\t Hint: fix it manually with the command below to resume the upload:'
                    )
                    print('\t\t\t admix-fix --fix_upload {0}'.format(did))

            # analysis for all RSEs other than datamanager
            else:

                if not ((rse['RucioNFiles'] == Nfiles and rse['RucioExists']
                         and rse['DBentries'] == 1
                         and rse['DBStatus'] == 'transferred') or
                        (rse['RucioNFiles'] == 0 and not rse['RucioExists']
                         and rse['DBentries'] == 0
                         and rse['DBStatus'] != 'transferred')):
                    print(
                        '\t\t Warning {0}: data in RSE {1} are inconsistent:'.
                        format(did, rse['name']))
                    print('\t\t ', rse)
Beispiel #5
0
class TestReplicaClients:
    def setup(self):
        self.replica_client = ReplicaClient()
        self.did_client = DIDClient()

    def test_add_list_bad_replicas(self):
        """ REPLICA (CLIENT): Add bad replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK')
        rse_id1 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_bad_file_replicas(
            replicas, 'This is a good reason')
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id1:
                    if badrep['scope'] == rep['scope'] and badrep[
                            'name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Run necromancer once
        necromancer_run(threads=1, bulk=10000, once=True)

        # Try to attach a lost file
        tmp_dsn = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn)
        with assert_raises(UnsupportedOperation):
            self.did_client.add_files_to_dataset(tmp_scope,
                                                 name=tmp_dsn,
                                                 files=files,
                                                 rse='MOCK')

        # Adding replicas to non-deterministic RSE
        files = [{
            'scope':
            tmp_scope,
            'name':
            'file_%s' % generate_uuid(),
            'bytes':
            1,
            'adler32':
            '0cc737eb',
            'pfn':
            'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s'
            % (tmp_scope, generate_uuid()),
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK2')
        rse_id2 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        print(replicas, list_rep)
        r = self.replica_client.declare_bad_file_replicas(
            replicas, 'This is a good reason')
        print(r)
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id2:
                    if badrep['scope'] == rep['scope'] and badrep[
                            'name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Now adding non-existing bad replicas
        files = [
            'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' %
            (tmp_scope, generate_uuid()),
        ]
        r = self.replica_client.declare_bad_file_replicas(
            files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_add_suspicious_replicas(self):
        """ REPLICA (CLIENT): Add suspicious replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(
            replicas, 'This is a good reason')
        assert_equal(r, {})

        # Adding replicas to non-deterministic RSE
        files = [{
            'scope':
            tmp_scope,
            'name':
            'file_%s' % generate_uuid(),
            'bytes':
            1,
            'adler32':
            '0cc737eb',
            'pfn':
            'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s'
            % (tmp_scope, generate_uuid()),
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(
            replicas, 'This is a good reason')
        assert_equal(r, {})

        # Now adding non-existing bad replicas
        files = [
            'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' %
            (tmp_scope, generate_uuid()),
        ]
        r = self.replica_client.declare_suspicious_file_replicas(
            files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_bad_replica_methods_for_UI(self):
        """ REPLICA (REST): Test the listing of bad and suspicious replicas """
        mw = []
        headers1 = {
            'X-Rucio-Account': 'root',
            'X-Rucio-Username': '******',
            'X-Rucio-Password': '******'
        }
        result = TestApp(auth_app.wsgifunc(*mw)).get('/userpass',
                                                     headers=headers1,
                                                     expect_errors=True)
        assert_equal(result.status, 200)
        token = str(result.header('X-Rucio-Auth-Token'))
        headers2 = {'X-Rucio-Auth-Token': str(token)}

        data = dumps({})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_files.append(dumps(line))
        nb_tot_files = len(tot_files)

        data = dumps({'state': 'B'})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_bad_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files1 = len(tot_bad_files)

        data = dumps({'state': 'S', 'list_pfns': 'True'})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_suspicious_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_suspicious_files.append(dumps(line))
        nb_tot_suspicious_files = len(tot_suspicious_files)

        data = dumps({'state': 'T', 'list_pfns': 'True'})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_temporary_unavailable_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_temporary_unavailable_files.append(dumps(line))
        nb_tot_temporary_unavailable_files = len(
            tot_temporary_unavailable_files)

        assert_equal(
            nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files +
            nb_tot_temporary_unavailable_files)

        tomorrow = datetime.utcnow() + timedelta(days=1)
        data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_bad_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files = len(tot_bad_files)
        assert_equal(nb_tot_bad_files, 0)

        data = dumps({})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        nb_tot_bad_files2 = 0
        for line in result.body.split('\n'):
            if line != '':
                line = loads(line)
                nb_tot_bad_files2 += int(line.get('BAD', 0))
        assert_equal(nb_tot_bad_files1, nb_tot_bad_files2)

    def test_add_list_replicas(self):
        """ REPLICA (CLIENT): Add, change state and list file replicas """
        tmp_scope = 'mock'
        nbfiles = 5

        files1 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files1)

        files2 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files2)

        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files1])
        ]
        assert_equal(len(replicas), len(files1))

        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files2],
                schemes=['file'])
        ]
        assert_equal(len(replicas), 5)

        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files2],
                schemes=['srm'])
        ]
        assert_equal(len(replicas), 5)

        files3 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'state': 'U',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files3)
        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files3],
                schemes=['file'])
        ]
        for i in range(nbfiles):
            assert_equal(replicas[i]['rses'], {})
        files4 = []
        for file in files3:
            file['state'] = 'A'
            files4.append(file)
        self.replica_client.update_replicas_states('MOCK3', files=files4)
        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files3],
                schemes=['file'],
                unavailable=True)
        ]
        assert_equal(len(replicas), 5)
        for i in range(nbfiles):
            assert_in('MOCK3', replicas[i]['rses'])

    def test_delete_replicas(self):
        """ REPLICA (CLIENT): Add and delete file replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)
        with assert_raises(AccessDenied):
            self.replica_client.delete_replicas(rse='MOCK', files=files)

        # replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files])]
        # assert_equal(len(replicas), 0)

    def test_add_temporary_unavailable_pfns(self):
        """ REPLICA (CLIENT): Add temporary unavailable PFNs"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            pfn = replica['pfns'].keys()[0]
            list_rep.append(pfn)

        # Submit bad PFNs
        now = datetime.utcnow()
        reason_str = generate_uuid()
        self.replica_client.add_bad_pfns(pfns=list_rep,
                                         reason=str(reason_str),
                                         state='TEMPORARY_UNAVAILABLE',
                                         expires_at=now.isoformat())
        result = get_bad_pfns(limit=10000,
                              thread=None,
                              total_threads=None,
                              session=None)
        bad_pfns = {}
        for res in result:
            bad_pfns[res['pfn']] = (res['state'], res['reason'],
                                    res['expires_at'])

        for pfn in list_rep:
            pfn = str(clean_surls([pfn])[0])
            assert_in(pfn, bad_pfns)
            assert_equal(str(bad_pfns[pfn][0]), 'TEMPORARY_UNAVAILABLE')
            assert_equal(bad_pfns[pfn][1], reason_str)

        # Submit with wrong state
        with assert_raises(RucioException):
            self.replica_client.add_bad_pfns(pfns=list_rep,
                                             reason=str(reason_str),
                                             state='BADSTATE',
                                             expires_at=now.isoformat())

        # Run minos once
        minos_run(threads=1, bulk=10000, once=True)
        result = get_bad_pfns(limit=10000,
                              thread=None,
                              total_threads=None,
                              session=None)
        pfns = [res['pfn'] for res in result]
        res_pfns = []
        for replica in list_rep:
            if replica in pfns:
                res_pfns.append(replica)
        assert_equal(res_pfns, [])

        # Check the state in the replica table
        for did in files:
            rep = get_replicas_state(scope=did['scope'], name=did['name'])
            assert_equal(str(rep.keys()[0]), 'TEMPORARY_UNAVAILABLE')

        rep = []
        for did in files:
            did['state'] = ReplicaState.from_sym('TEMPORARY_UNAVAILABLE')
            rep.append(did)

        # Run the minos expiration
        minos_temp_run(threads=1, once=True)
        # Check the state in the replica table
        for did in files:
            rep = get_replicas_state(scope=did['scope'], name=did['name'])
            assert_equal(str(rep.keys()[0]), 'AVAILABLE')

    def test_set_tombstone(self):
        """ REPLICA (CLIENT): set tombstone on replica """
        # Set tombstone on one replica
        rse = 'MOCK4'
        scope = 'mock'
        user = '******'
        name = generate_uuid()
        add_replica(rse, scope, name, 4, user)
        assert_equal(get_replica(rse, scope, name)['tombstone'], None)
        self.replica_client.set_tombstone([{
            'rse': rse,
            'scope': scope,
            'name': name
        }])
        assert_equal(get_replica(rse, scope, name)['tombstone'], OBSOLETE)

        # Set tombstone on locked replica
        name = generate_uuid()
        add_replica(rse, scope, name, 4, user)
        RuleClient().add_replication_rule([{
            'name': name,
            'scope': scope
        }],
                                          1,
                                          rse,
                                          locked=True)
        with assert_raises(ReplicaIsLocked):
            self.replica_client.set_tombstone([{
                'rse': rse,
                'scope': scope,
                'name': name
            }])

        # Set tombstone on not found replica
        name = generate_uuid()
        with assert_raises(ReplicaNotFound):
            self.replica_client.set_tombstone([{
                'rse': rse,
                'scope': scope,
                'name': name
            }])
Beispiel #6
0
    def test_replica_no_site(self):
        """ REPLICA (CORE): Test listing replicas without site attribute """

        rc = ReplicaClient()

        rse = 'APERTURE_%s' % rse_name_generator()
        add_rse(rse)

        add_protocol(
            rse, {
                'scheme': 'root',
                'hostname': 'root.aperture.com',
                'port': 1409,
                'prefix': '//test/chamber/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })

        add_rse_attribute(rse=rse, key='site', value='APERTURE')

        files = [{
            'scope': 'mock',
            'name': 'element_%s' % generate_uuid(),
            'bytes': 1234,
            'adler32': 'deadbeef'
        }]
        add_replicas(rse=rse, files=files, account='root')

        replicas = [
            r for r in rc.list_replicas(dids=[{
                'scope': 'mock',
                'name': f['name']
            } for f in files])
        ]
        assert_in('root://', replicas[0]['pfns'].keys()[0])

        replicas = [
            r for r in rc.list_replicas(dids=[{
                'scope': 'mock',
                'name': f['name']
            } for f in files],
                                        client_location={'site': 'SOMEWHERE'})
        ]
        assert_in('root://', replicas[0]['pfns'].keys()[0])

        del_rse_attribute(rse=rse, key='site')

        replicas = [
            r for r in rc.list_replicas(dids=[{
                'scope': 'mock',
                'name': f['name']
            } for f in files])
        ]
        assert_in('root://', replicas[0]['pfns'].keys()[0])

        replicas = [
            r for r in rc.list_replicas(dids=[{
                'scope': 'mock',
                'name': f['name']
            } for f in files],
                                        client_location={'site': 'SOMEWHERE'})
        ]
        assert_in('root://', replicas[0]['pfns'].keys()[0])
Beispiel #7
0
class TestReplicaMetalink:
    def setup(self):
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.base_client = BaseClient(account='root',
                                      ca_cert=config_get('client', 'ca_cert'),
                                      auth_type='x509')
        self.token = self.base_client.headers['X-Rucio-Auth-Token']

        self.fname = generate_uuid()

        rses = ['MOCK', 'MOCK3', 'MOCK4']
        dsn = generate_uuid()
        self.files = [{
            'scope': 'mock',
            'name': self.fname,
            'bytes': 1,
            'adler32': '0cc737eb'
        }]

        self.did_client.add_dataset(scope='mock', name=dsn)
        self.did_client.add_files_to_dataset('mock',
                                             name=dsn,
                                             files=self.files,
                                             rse='MOCK')
        for r in rses:
            self.replica_client.add_replicas(r, self.files)

    def test_list_replicas_metalink_4(self):
        """ REPLICA (METALINK): List replicas as metalink version 4 """
        ml = xmltodict.parse(self.replica_client.list_replicas(
            self.files,
            metalink=4,
            unavailable=True,
            schemes=['https', 'sftp', 'file']),
                             xml_attribs=False)
        assert_equal(3, len(ml['metalink']['file']['url']))

    def test_get_did_from_pfns_nondeterministic(self):
        """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for non-deterministic sites"""
        rse = 'MOCK2'
        tmp_scope = 'mock'
        nbfiles = 3
        pfns = []
        input = {}
        rse_info = rsemgr.get_rse_info(rse)
        assert_equal(rse_info['deterministic'], False)
        files = [{
            'scope':
            tmp_scope,
            'name':
            'file_%s' % generate_uuid(),
            'bytes':
            1,
            'adler32':
            '0cc737eb',
            'pfn':
            'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s'
            % (tmp_scope, generate_uuid()),
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        for f in files:
            input[f['pfn']] = {'scope': f['scope'], 'name': f['name']}
        add_replicas(rse=rse,
                     files=files,
                     account='root',
                     ignore_availability=True)
        for replica in list_replicas(dids=[{
                'scope': f['scope'],
                'name': f['name'],
                'type': DIDType.FILE
        } for f in files],
                                     schemes=['srm'],
                                     ignore_availability=True):
            for rse in replica['rses']:
                pfns.extend(replica['rses'][rse])
        for result in self.replica_client.get_did_from_pfns(pfns, rse):
            pfn = result.keys()[0]
            assert_equal(input[pfn], result.values()[0])

    def test_get_did_from_pfns_deterministic(self):
        """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for deterministic sites"""
        tmp_scope = 'mock'
        rse = 'MOCK3'
        nbfiles = 3
        pfns = []
        input = {}
        rse_info = rsemgr.get_rse_info(rse)
        assert_equal(rse_info['deterministic'], True)
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        p = rsemgr.create_protocol(rse_info, 'read', scheme='srm')
        for f in files:
            pfn = p.lfns2pfns(lfns={
                'scope': f['scope'],
                'name': f['name']
            }).values()[0]
            pfns.append(pfn)
            input[pfn] = {'scope': f['scope'], 'name': f['name']}
        add_replicas(rse=rse,
                     files=files,
                     account='root',
                     ignore_availability=True)
        for result in self.replica_client.get_did_from_pfns(pfns, rse):
            pfn = result.keys()[0]
            assert_equal(input[pfn], result.values()[0])
Beispiel #8
0
class TestReplicaSorting(unittest.TestCase):

    def setUp(self):
        if config_get_bool('common', 'multi_vo', raise_exception=False, default=False):
            self.vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')}
        else:
            self.vo = {}

    def test_replica_sorting(self):
        """ REPLICA (CORE): Test the correct sorting of the replicas across WAN and LAN """

        self.rc = ReplicaClient()

        self.rse1 = 'APERTURE_%s' % rse_name_generator()
        self.rse2 = 'BLACKMESA_%s' % rse_name_generator()
        self.rse1_id = add_rse(self.rse1, **self.vo)
        self.rse2_id = add_rse(self.rse2, **self.vo)
        add_rse_attribute(rse_id=self.rse1_id, key='site', value='APERTURE')
        add_rse_attribute(rse_id=self.rse2_id, key='site', value='BLACKMESA')

        self.files = [{'scope': InternalScope('mock', **self.vo), 'name': 'element_0',
                       'bytes': 1234, 'adler32': 'deadbeef'}]
        root = InternalAccount('root', **self.vo)
        add_replicas(rse_id=self.rse1_id, files=self.files, account=root)
        add_replicas(rse_id=self.rse2_id, files=self.files, account=root)

        add_protocol(self.rse1_id, {'scheme': 'root',
                                    'hostname': 'root.aperture.com',
                                    'port': 1409,
                                    'prefix': '//test/chamber/',
                                    'impl': 'rucio.rse.protocols.xrootd.Default',
                                    'domains': {
                                        'lan': {'read': 1, 'write': 1, 'delete': 1},
                                        'wan': {'read': 1, 'write': 1, 'delete': 1}}})
        add_protocol(self.rse1_id, {'scheme': 'davs',
                                    'hostname': 'davs.aperture.com',
                                    'port': 443,
                                    'prefix': '/test/chamber/',
                                    'impl': 'rucio.rse.protocols.gfal.Default',
                                    'domains': {
                                        'lan': {'read': 2, 'write': 2, 'delete': 2},
                                        'wan': {'read': 2, 'write': 2, 'delete': 2}}})
        add_protocol(self.rse1_id, {'scheme': 'gsiftp',
                                    'hostname': 'gsiftp.aperture.com',
                                    'port': 8446,
                                    'prefix': '/test/chamber/',
                                    'impl': 'rucio.rse.protocols.gfal.Default',
                                    'domains': {
                                        'lan': {'read': 0, 'write': 0, 'delete': 0},
                                        'wan': {'read': 3, 'write': 3, 'delete': 3}}})

        add_protocol(self.rse2_id, {'scheme': 'gsiftp',
                                    'hostname': 'gsiftp.blackmesa.com',
                                    'port': 8446,
                                    'prefix': '/lambda/complex/',
                                    'impl': 'rucio.rse.protocols.gfal.Default',
                                    'domains': {
                                        'lan': {'read': 2, 'write': 2, 'delete': 2},
                                        'wan': {'read': 1, 'write': 1, 'delete': 1}}})
        add_protocol(self.rse2_id, {'scheme': 'davs',
                                    'hostname': 'davs.blackmesa.com',
                                    'port': 443,
                                    'prefix': '/lambda/complex/',
                                    'impl': 'rucio.rse.protocols.gfal.Default',
                                    'domains': {
                                        'lan': {'read': 0, 'write': 0, 'delete': 0},
                                        'wan': {'read': 2, 'write': 2, 'delete': 2}}})
        add_protocol(self.rse2_id, {'scheme': 'root',
                                    'hostname': 'root.blackmesa.com',
                                    'port': 1409,
                                    'prefix': '//lambda/complex/',
                                    'impl': 'rucio.rse.protocols.xrootd.Default',
                                    'domains': {
                                        'lan': {'read': 1, 'write': 1, 'delete': 1},
                                        'wan': {'read': 3, 'write': 3, 'delete': 3}}})

        replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock',
                                                            'name': f['name'],
                                                            'type': 'FILE'} for f in self.files],
                                                     schemes=['root', 'gsiftp', 'davs'],
                                                     client_location={'site': 'APERTURE'})]
        pfns = [r['pfns'] for r in replicas][0]
        assert len(pfns.keys()) == 5
        assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'lan'
        assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] == 1
        assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'lan'
        assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] == 2
        assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] == 3
        assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['priority'] == 4
        assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] == 5
        replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock',
                                                            'name': f['name'],
                                                            'type': 'FILE'} for f in self.files],
                                                     schemes=['root', 'gsiftp', 'davs'],
                                                     client_location={'site': 'BLACKMESA'})]
        pfns = [r['pfns'] for r in replicas][0]
        assert len(pfns.keys()) == 5
        assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'lan'
        assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] == 1
        assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'lan'
        assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] == 2
        assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] == 3
        assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] == 4
        assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['priority'] == 5
        replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock',
                                                            'name': f['name'],
                                                            'type': 'FILE'} for f in self.files],
                                                     schemes=['root', 'gsiftp', 'davs'],
                                                     client_location={'site': 'XEN'})]
        pfns = [r['pfns'] for r in replicas][0]
        assert len(pfns.keys()) == 6
        # TODO: intractable until RSE sorting is enabled
        assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] in [1, 2]
        assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] in [1, 2]
        assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] in [3, 4]
        assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['priority'] in [3, 4]
        assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['priority'] in [5, 6]
        assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'wan'
        assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] in [5, 6]

        ml = self.rc.list_replicas(dids=[{'scope': 'mock',
                                          'name': f['name'],
                                          'type': 'FILE'} for f in self.files],
                                   schemes=['root', 'gsiftp', 'davs'],
                                   metalink=True,
                                   client_location={'site': 'APERTURE'})
        assert 'domain="lan" priority="1" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml
        assert 'domain="lan" priority="2" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="3" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="4" client_extract="false">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="5" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml
        assert 'priority="6"' not in ml

        ml = self.rc.list_replicas(dids=[{'scope': 'mock',
                                          'name': f['name'],
                                          'type': 'FILE'} for f in self.files],
                                   schemes=['root', 'gsiftp', 'davs'],
                                   metalink=True,
                                   client_location={'site': 'BLACKMESA'})
        assert 'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml
        assert 'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml
        assert 'priority="6"' not in ml

        # TODO: intractable until RSE sorting is enabled
        # ml = self.rc.list_replicas(dids=[{'scope': 'mock',
        #                                   'name': f['name'],
        #                                   'type': 'FILE'} for f in self.files],
        #                            schemes=['root', 'gsiftp', 'davs'],
        #                            metalink=True,
        #                            client_location={'site': 'XEN'})
        # assert 'domain="wan" priority="1">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml
        # assert 'domain="wan" priority="2">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml
        # assert 'domain="wan" priority="3">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml
        # assert 'domain="wan" priority="4">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0' in ml
        # assert 'domain="wan" priority="5">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml
        # assert 'domain="wan" priority="6">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml
        # assert 'priority="7"' not in ml

        # ensure correct handling of disabled protocols
        add_protocol(self.rse1_id, {'scheme': 'root',
                                    'hostname': 'root2.aperture.com',
                                    'port': 1409,
                                    'prefix': '//test/chamber/',
                                    'impl': 'rucio.rse.protocols.xrootd.Default',
                                    'domains': {
                                        'lan': {'read': 1, 'write': 1, 'delete': 1},
                                        'wan': {'read': 0, 'write': 0, 'delete': 0}}})

        ml = self.rc.list_replicas(dids=[{'scope': 'mock',
                                          'name': f['name'],
                                          'type': 'FILE'} for f in self.files],
                                   schemes=['root', 'gsiftp', 'davs'],
                                   metalink=True,
                                   client_location={'site': 'BLACKMESA'})
        assert 'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml
        assert 'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml
        assert 'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml
        assert 'priority="6"' not in ml

        delete_replicas(rse_id=self.rse1_id, files=self.files)
        delete_replicas(rse_id=self.rse2_id, files=self.files)
        del_rse(self.rse1_id)
        del_rse(self.rse2_id)
Beispiel #9
0
class TestROOTProxy(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        if config_get_bool('common',
                           'multi_vo',
                           raise_exception=False,
                           default=False):
            self.vo = {
                'vo':
                config_get('client',
                           'vo',
                           raise_exception=False,
                           default='tst')
            }
            self.vo_header = {'X-Rucio-VO': self.vo['vo']}
        else:
            self.vo = {}
            self.vo_header = {}

        self.rc = ReplicaClient()

        self.client_location_without_proxy = {
            'ip': '192.168.0.1',
            'fqdn': 'anomalous-materials.blackmesa.com',
            'site': 'BLACKMESA1'
        }
        self.rse_without_proxy = rse_name_generator()
        self.rse_without_proxy_id = add_rse(self.rse_without_proxy, **self.vo)
        add_rse_attribute(rse_id=self.rse_without_proxy_id,
                          key='site',
                          value='BLACKMESA1')

        self.client_location_with_proxy = {
            'ip': '10.0.1.1',
            'fqdn': 'test-chamber.aperture.com',
            'site': 'APERTURE1'
        }
        self.rse_with_proxy = rse_name_generator()
        self.rse_with_proxy_id = add_rse(self.rse_with_proxy, **self.vo)
        add_rse_attribute(rse_id=self.rse_with_proxy_id,
                          key='site',
                          value='APERTURE1')

        # APERTURE1 site has an internal proxy
        config_set('root-proxy-internal', 'APERTURE1',
                   'proxy.aperture.com:1094')

        self.files = [{
            'scope': InternalScope('mock', **self.vo),
            'name': 'half-life_%s' % i,
            'bytes': 1234,
            'adler32': 'deadbeef',
            'meta': {
                'events': 666
            }
        } for i in range(1, 4)]
        for rse_id in [self.rse_with_proxy_id, self.rse_without_proxy_id]:
            add_replicas(rse_id=rse_id,
                         files=self.files,
                         account=InternalAccount('root', **self.vo),
                         ignore_availability=True)

        add_protocol(
            self.rse_without_proxy_id, {
                'scheme': 'root',
                'hostname': 'root.blackmesa.com',
                'port': 1409,
                'prefix': '//training/facility/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })

        add_protocol(
            self.rse_with_proxy_id, {
                'scheme': 'root',
                'hostname': 'root.aperture.com',
                'port': 1409,
                'prefix': '//test/chamber/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })

    @classmethod
    def tearDownClass(cls):
        for rse_id in [cls.rse_with_proxy_id, cls.rse_without_proxy_id]:
            delete_replicas(rse_id=rse_id, files=cls.files)
        del_rse(cls.rse_with_proxy_id)
        del_rse(cls.rse_without_proxy_id)

    def test_client_list_replicas1(self):
        """ ROOT (CLIENT): No proxy involved """

        replicas = [
            r for r in self.rc.list_replicas(
                dids=[{
                    'scope': 'mock',
                    'name': f['name'],
                    'type': 'FILE'
                } for f in self.files],
                rse_expression=self.rse_without_proxy,
                client_location=self.client_location_without_proxy)
        ]

        expected_pfns = [
            'root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1',
            'root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2',
            'root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3'
        ]
        found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas]
        assert sorted(found_pfns) == sorted(expected_pfns)

    def test_client_list_replicas2(self):
        """ ROOT (CLIENT): Outgoing proxy needs to be prepended"""

        replicas = [
            r for r in self.rc.list_replicas(
                dids=[{
                    'scope': 'mock',
                    'name': f['name'],
                    'type': 'FILE'
                } for f in self.files],
                rse_expression=self.rse_without_proxy,
                client_location=self.client_location_with_proxy)
        ]

        expected_pfns = [
            'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1',
            'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2',
            'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3'
        ]
        found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas]
        assert sorted(found_pfns) == sorted(expected_pfns)

    def test_client_list_replicas3(self):
        """ ROOT (CLIENT): Outgoing proxy at destination does not matter"""

        replicas = [
            r for r in self.rc.list_replicas(
                dids=[{
                    'scope': 'mock',
                    'name': f['name'],
                    'type': 'FILE'
                } for f in self.files],
                rse_expression=self.rse_with_proxy,
                client_location=self.client_location_without_proxy)
        ]

        expected_pfns = [
            'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1',
            'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2',
            'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3'
        ]
        found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas]
        assert sorted(found_pfns) == sorted(expected_pfns)

    def test_client_list_replicas4(self):
        """ ROOT (CLIENT): Outgoing proxy does not matter when staying at site"""

        replicas = [
            r for r in self.rc.list_replicas(
                dids=[{
                    'scope': 'mock',
                    'name': f['name'],
                    'type': 'FILE'
                } for f in self.files],
                rse_expression=self.rse_with_proxy,
                client_location=self.client_location_with_proxy)
        ]
        expected_pfns = [
            'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1',
            'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2',
            'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3'
        ]
        found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas]
        assert sorted(found_pfns) == sorted(expected_pfns)

    def test_redirect_metalink_list_replicas(self):
        """ ROOT (REDIRECT REST): Test internal proxy prepend with metalink"""
        mw = []

        # default behaviour - no location -> no proxy
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_1/metalink',
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1' in body
        assert 'proxy' not in body
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_2/metalink',
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2' in body
        assert 'proxy' not in body
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_3/metalink',
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' in body
        assert 'proxy' not in body

        # site without proxy
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_1/metalink?%s' %
            urlencode(self.client_location_without_proxy),
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1' in body
        assert 'proxy' not in body
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_2/metalink?%s' %
            urlencode(self.client_location_without_proxy),
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2' in body
        assert 'proxy' not in body
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_3/metalink?%s' %
            urlencode(self.client_location_without_proxy),
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' in res
        assert 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' in res
        assert 'proxy' not in res

        # at location with outgoing proxy, prepend for wan replica
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_1/metalink?%s' %
            urlencode(self.client_location_with_proxy),
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1' in body
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_2/metalink?%s' %
            urlencode(self.client_location_with_proxy),
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2' in body
        res = TestApp(redirect_app.wsgifunc(*mw)).get(
            '/mock/half-life_3/metalink?%s' %
            urlencode(self.client_location_with_proxy),
            headers=self.vo_header,
            expect_errors=True)
        body = res.body.decode()
        assert 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' in body
        assert 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' in body
Beispiel #10
0
class TestArchive(object):
    def __init__(self):
        self.dc = DIDClient()
        self.rc = ReplicaClient()

    def test_add_and_list_archive(self):
        """  ARCHIVE (CLIENT): Add files to archive and list the content """
        scope, rse = 'mock', 'MOCK'
        archive_files = ['file_' + generate_uuid() + '.zip' for _ in range(2)]
        files = []
        for i in range(10):
            files.append({
                'scope': scope,
                'name': 'lfn.%s' % str(generate_uuid()),
                'bytes': 724963570,
                'adler32': '0cc737eb',
                'type': 'FILE',
                'meta': {
                    'guid': str(generate_uuid())
                }
            })
        for archive_file in archive_files:

            self.rc.add_replicas(rse=rse,
                                 files=[{
                                     'scope': scope,
                                     'name': archive_file,
                                     'bytes': 1,
                                     'adler32': '0cc737eb'
                                 }])

            self.dc.add_files_to_archive(scope=scope,
                                         name=archive_file,
                                         files=files)

            content = [
                f for f in self.dc.list_archive_content(scope=scope,
                                                        name=archive_file)
            ]

            assert_equal(len(content), 10)

    def test_list_archive_contents_transparently(self):
        """ ARCHIVE (CORE): Transparent archive listing """

        scope = 'mock'
        rse = 'APERTURE_%s' % rse_name_generator()
        add_rse(rse)

        add_protocol(
            rse, {
                'scheme': 'root',
                'hostname': 'root.aperture.com',
                'port': 1409,
                'prefix': '//test/chamber/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })

        # register archive
        archive = {
            'scope': scope,
            'name': 'weighted.storage.cube.zip',
            'type': 'FILE',
            'bytes': 2596,
            'adler32': 'beefdead'
        }
        add_replicas(rse=rse, files=[archive], account='root')

        # archived files with replicas
        files_with_replicas = [{
            'scope':
            scope,
            'name':
            'witrep-%i-%s' % (i, str(generate_uuid())),
            'type':
            'FILE',
            'bytes':
            1234,
            'adler32':
            'deadbeef'
        } for i in xrange(2)]
        add_replicas(rse=rse, files=files_with_replicas, account='root')
        self.dc.add_files_to_archive(scope=scope,
                                     name=archive['name'],
                                     files=files_with_replicas)

        res = [
            r['pfns'] for r in self.rc.list_replicas(dids=[{
                'scope': scope,
                'name': f['name']
            } for f in files_with_replicas])
        ]
        assert_equal(len(res), 2)
        assert_equal(len(res[0]), 2)
        assert_equal(len(res[1]), 2)
        for r in res:
            for p in r:
                if r[p]['domain'] == 'zip':
                    assert_in('weighted.storage.cube.zip?xrdcl.unzip=witrep-',
                              p)
                else:
                    assert_not_in(
                        'weighted.storage.cube.zip?xrdcl.unzip=witrep-', p)

        # archived files without replicas
        files = [{
            'scope': scope,
            'name': 'norep-%i-%s' % (i, str(generate_uuid())),
            'type': 'FILE',
            'bytes': 1234,
            'adler32': 'deadbeef'
        } for i in xrange(2)]
        self.dc.add_files_to_archive(scope=scope,
                                     name=archive['name'],
                                     files=files)
        res = [
            r['pfns'] for r in self.rc.list_replicas(dids=[{
                'scope': scope,
                'name': f['name']
            } for f in files])
        ]
        assert_equal(len(res), 2)
        for r in res:
            assert_in('weighted.storage.cube.zip?xrdcl.unzip=norep-',
                      r.keys()[0])

        del_rse(rse)
Beispiel #11
0
class Fix():
    def __init__(self):

        #Take all data types categories
        self.RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['raw_records_tpc_types']
        self.RAW_RECORDS_MV_TYPES = helper.get_hostconfig(
        )['raw_records_mv_types']
        self.RAW_RECORDS_NV_TYPES = helper.get_hostconfig(
        )['raw_records_nv_types']
        self.LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['light_raw_records_tpc_types']
        self.LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig(
        )['light_raw_records_mv_types']
        self.LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig(
        )['light_raw_records_nv_types']
        self.HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types']
        self.RECORDS_TYPES = helper.get_hostconfig()['records_types']

        #Choose which data type you want to treat
        self.DTYPES = self.RAW_RECORDS_TPC_TYPES + self.RAW_RECORDS_MV_TYPES + self.RAW_RECORDS_NV_TYPES + self.LIGHT_RAW_RECORDS_TPC_TYPES + self.LIGHT_RAW_RECORDS_MV_TYPES + self.LIGHT_RAW_RECORDS_NV_TYPES + self.HIGH_LEVEL_TYPES + self.RECORDS_TYPES

        #Take the list of all XENON RSEs
        self.RSES = helper.get_hostconfig()['rses']

        #Take the RSE that is used to perform the upload
        self.UPLOAD_TO = helper.get_hostconfig()['upload_to']

        #Take the directory where datamanager has to upload data
        self.DATADIR = helper.get_hostconfig()['path_data_to_upload']

        # Get the sequence of rules to be created according to the data type
        self.RAW_RECORDS_TPC_RSES = helper.get_hostconfig(
        )["raw_records_tpc_rses"]
        self.RAW_RECORDS_MV_RSES = helper.get_hostconfig(
        )["raw_records_mv_rses"]
        self.RAW_RECORDS_NV_RSES = helper.get_hostconfig(
        )["raw_records_nv_rses"]
        self.LIGHT_RAW_RECORDS_TPC_RSES = helper.get_hostconfig(
        )["light_raw_records_tpc_rses"]
        self.LIGHT_RAW_RECORDS_MV_RSES = helper.get_hostconfig(
        )["light_raw_records_mv_rses"]
        self.LIGHT_RAW_RECORDS_NV_RSES = helper.get_hostconfig(
        )["light_raw_records_nv_rses"]
        self.HIGH_LEVEL_RSES = helper.get_hostconfig()["high_level_rses"]
        self.RECORDS_RSES = helper.get_hostconfig()["records_rses"]

        #Init the runDB
        self.db = ConnectMongoDB()

        #Init Rucio for later uploads and handling:
        self.rc = RucioSummoner()

        #Init the Rucio replica client
        self.replicaclient = ReplicaClient()

        #Rucio Rule assignment priority
        self.priority = 3

    def reset_upload(self, did):

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print("Resetting the upload associated to the DID: {0}".format(did))
        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))

        run = self.db.db.find_one({'number': number})

        # Gets the status
        if 'status' in run:
            print('Run status: {0}'.format(run['status']))
        else:
            print('Run status: {0}'.format('Not available'))

        # Extracts the correct Event Builder machine who processed this run
        # Then also the bootstrax state and, in case it was abandoned, the reason
        if 'bootstrax' in run:
            bootstrax = run['bootstrax']
            eb = bootstrax['host'].split('.')[0]
        else:
            print('Not processed')
            return (0)

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        if datum is None:
            print('There is no EB datum. No reset is possible')
            return (0)

        if ebstatus != "":
            print('EB status: {0}'.format(ebstatus))
        else:
            print('EB status: not available')

        # Step zero (normally not needed): change the run status to "transferring"
        #    self.db.db.find_one_and_update({'number':number},{'$set':{"status": "transferring"}})

        # First action: remove the files stored in datamanager
        files = list_file_replicas(number, dtype, hash, self.UPLOAD_TO)
        print("Deleting rucio data in datamanager disk. Deleting", len(files),
              "files")
        for file in files:
            try:
                os.remove(file)
            except:
                print("File: {0} not found".format(file))

        # Second action: remove the LNGS Rucio rule
        deleted_any_rule = False
        for rse in self.RSES:
            rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse)
            if rucio_rule['exists']:
                print("Deleting rucio rule = ", rucio_rule['id'],
                      "from RSE = ", rse)
                self.rc.DeleteRule(rucio_rule['id'])
                deleted_any_rule = True

        # Third action: remove possible files in datamanager in case the Rucio rule does not exists
        datamanager_rucio_rule = self.rc.GetRule(upload_structure=did,
                                                 rse=self.UPLOAD_TO)
        if not datamanager_rucio_rule['exists']:
            print(
                "Rucio rule not existing. Deleting data in datamanager without Rucio"
            )
            filelistname = os.path.join(
                "/archive/data/rucio/xnt_%06d/*/*/" % number,
                dtype + "-" + hash + "*")
            filelist = glob.glob(filelistname)
            for filePath in filelist:
                try:
                    os.remove(filePath)
                except:
                    print("Error while deleting file : ", filePath)

        # If some rule has been deleted, wait for 1 hour (plus 5 minutes of margin)
        if deleted_any_rule:
            print(
                "We have to wait until the rule is fully deleted before changing the status of the datum. It could take at least an hour"
            )
            while True:
                datamanager_rucio_rule = self.rc.GetRule(upload_structure=did,
                                                         rse=self.UPLOAD_TO)
                if not datamanager_rucio_rule['exists']:
                    print("Rule for did {0} finally deleted".format(did))
                    break
                delay = 60 * 10
                time.sleep(delay)
        else:
            print("There is no rule to delete")

        # Fourth action: set the EB status as 'eb_ready_to_upload'
        self.db.db.find_one_and_update(
            {
                '_id': run['_id'],
                'data': {
                    '$elemMatch': {
                        'type': datum['type'],
                        'location': datum['location'],
                        'host': datum['host']
                    }
                }
            }, {'$set': {
                "data.$.status": 'eb_ready_to_upload'
            }})

        print("EB status changed to eb_ready_to_upload")

        # Reload the run
        run = self.db.db.find_one({'number': number})

        # Gets the status
        if 'status' in run:
            print('New run status: {0}'.format(run['status']))
        else:
            print('Ru status: {0}'.format('Not available'))

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        # Prints the eb status as a confirmation of the performed change
        if ebstatus != "":
            print('New EB status: {0}'.format(ebstatus))
        else:
            print('New EB status: not available')

    def add_rule(self, did, from_rse, to_rse):

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print("Adding a new rule {0} from {1} to {2}".format(
            did, from_rse, to_rse))
        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))

        run = self.db.db.find_one({'number': number})

        # Gets the status
        if 'status' in run:
            print('Run status: {0}'.format(run['status']))
        else:
            print('Run status: {0}'.format('Not available'))

        #Checks if the datum of the sender exists in the DB
        datum = None
        for d in run['data']:
            if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[
                    'location'] == from_rse:
                datum = d
                break
        if datum is None:
            print(
                'The datum concerning data type {0} and site {1} is missing in the DB. Forced to stop'
                .format(dtype, from_rse))
            return (0)

        # Checks the rule status of the sender RSE
        rucio_rule = self.rc.GetRule(upload_structure=did, rse=from_rse)
        if rucio_rule['state'] != 'OK' and rucio_rule['state'] != 'REPLICATING':
            print(
                'The rule in {0} is neither OK nor REPLICATING. Forced to stop'
                .format(from_rse))
            return (0)

        # set the new rule
        if not self.skip_rucio:
            print("Adding the Rucio rule")
            self.rc.AddConditionalRule(did,
                                       from_rse,
                                       to_rse,
                                       lifetime=None,
                                       priority=self.priority)
        else:
            print("Rucio rule is not added")
        rucio_rule = self.rc.GetRule(did, rse=to_rse)

        # Update run status
        self.db.db.find_one_and_update({'number': number},
                                       {'$set': {
                                           'status': 'transferring'
                                       }})

        # Add a new datum in the run document
        updated_fields = {
            'host': "rucio-catalogue",
            'type': dtype,
            'location': to_rse,
            'lifetime': rucio_rule['expires'],
            'status': 'transferring',
            'did': did,
            'protocol': 'rucio'
        }
        data_dict = datum.copy()
        data_dict.update(updated_fields)
        self.db.AddDatafield(run['_id'], data_dict)

        print("Done.")

    def add_rules_from_file(self, filename, from_rse, to_rse):

        with open(filename) as f:
            dids = f.read().splitlines()
            f.close()

        for did in dids:

            if did[0] == "#":
                continue

            hash = did.split('-')[-1]
            dtype = did.split('-')[0].split(':')[-1]
            number = int(did.split(':')[0].split('_')[-1])

            timestamp = time.strftime("%Y-%m-%d-%H-%M-%S",
                                      time.localtime(time.time()))

            print("{0} - Adding a new rule {1} from {2} to {3}".format(
                timestamp, did, from_rse, to_rse))

            # Checks the rule status of the sender RSE
            rucio_rule = self.rc.GetRule(upload_structure=did, rse=from_rse)
            if rucio_rule['state'] != 'OK' and rucio_rule[
                    'state'] != 'REPLICATING':
                print(
                    'The rule in {0} is neither OK nor REPLICATING. Skipping this DID'
                    .format(from_rse))
                continue

            # Checks the rule status of the destination RSE
            rucio_rule = self.rc.GetRule(upload_structure=did, rse=to_rse)
            if rucio_rule['exists']:
                print(
                    'The rule in {0} already exists and its status is {1}. Skipping this DID'
                    .format(to_rse, rucio_rule['state']))
                continue

            # Creates the new rule
            print("Adding the Rucio rule")
            self.rc.AddConditionalRule(did,
                                       from_rse,
                                       to_rse,
                                       lifetime=None,
                                       priority=5)

            # Waits until Rucio sees this rule as successfully transferred
            print("Waiting until the transfer is completed")
            rule_is_ok = False
            while not rule_is_ok:
                delay = 10  #60
                time.sleep(delay)
                rucio_rule = self.rc.GetRule(did, rse=to_rse)
                if rucio_rule['state'] == 'OK':
                    rule_is_ok = True
            print("Transfer completed")

            wait_time = 10
            print('Waiting for {0} seconds'.format(wait_time))
            print("You can safely CTRL-C now if you need to stop me")
            try:
                time.sleep(wait_time)
            except KeyboardInterrupt:
                break

    def delete_rule(self, did, rse):

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print("Deleting the rule {0} from {1}".format(did, rse))
        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))

        run = self.db.db.find_one({'number': number})

        #Checks if the datum exists in the DB
        datum = None
        for d in run['data']:
            if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[
                    'location'] == rse:
                datum = d
                break

        #Delete the datum
        if datum is not None:
            self.db.RemoveDatafield(run['_id'], datum)
            print("Datum deleted in DB.")
        else:
            print('There is no datum to delete')

        #Get the rule of a given DID
        rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse)

        #Delete the rule
        if rucio_rule['exists']:
            self.rc.DeleteRule(rucio_rule['id'])
            print("Rucio rule deleted.")
        else:
            print('There is no Rucio rule to delete')

        #In case it is datamanager, directly delete files
        if rse == self.UPLOAD_TO:
            files = list_file_replicas(number, dtype, hash, self.UPLOAD_TO)
            print("Deleting rucio data in datamanager disk. Deleting",
                  len(files), "files")
            for file in files:
                try:
                    os.remove(file)
                except:
                    print("File: {0} not found".format(file))

        print("Done.")

    def create_upload_rules(self, did):

        rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO)

        dtype = did.split('-')[0].split(':')[-1]

        # Fourth action: creating the rules abroad
        if rucio_rule['exists'] and rucio_rule['state'] == "OK":
            print("Adding the Rucio rules abroad...")

            rses = [self.UPLOAD_TO]

            if dtype in self.RAW_RECORDS_TPC_TYPES:
                rses = rses + self.RAW_RECORDS_TPC_RSES
            if dtype in self.RAW_RECORDS_MV_TYPES:
                rses = rses + self.RAW_RECORDS_MV_RSES
            if dtype in self.RAW_RECORDS_NV_TYPES:
                rses = rses + self.RAW_RECORDS_NV_RSES

            if dtype in self.LIGHT_RAW_RECORDS_TPC_TYPES:
                rses = rses + self.LIGHT_RAW_RECORDS_TPC_RSES
            if dtype in self.LIGHT_RAW_RECORDS_MV_TYPES:
                rses = rses + self.LIGHT_RAW_RECORDS_MV_RSES
            if dtype in self.LIGHT_RAW_RECORDS_NV_TYPES:
                rses = rses + self.LIGHT_RAW_RECORDS_NV_RSES

            if dtype in self.HIGH_LEVEL_TYPES:
                rses = rses + self.HIGH_LEVEL_RSES

            if dtype in self.RECORDS_TYPES:
                rses = rses + self.RECORDS_RSES

            for from_rse, to_rse in zip(rses, rses[1:]):
                to_rule = self.rc.GetRule(upload_structure=did, rse=to_rse)
                if not to_rule['exists']:
                    print("Rule from {0} to {1}".format(from_rse, to_rse))
                    self.add_rule(did, from_rse, to_rse)

    def fix_upload(self, did):

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print("Fixing the upload associated to the DID: {0}".format(did))
        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))

        run = self.db.db.find_one({'number': number})

        # Gets the status
        if 'status' in run:
            print('Run status: {0}'.format(run['status']))
        else:
            print('Run status: {0}'.format('Not available'))

        # Extracts the correct Event Builder machine who processed this run
        # Then also the bootstrax state and, in case it was abandoned, the reason
        if 'bootstrax' in run:
            bootstrax = run['bootstrax']
            eb = bootstrax['host'].split('.')[0]
        else:
            print('Not processed')
            return (0)

        print("EB: {0}".format(eb))

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        if datum is None:
            print('There is no EB datum. No fix is possible')
            return (0)

        if ebstatus != "":
            print('EB status: {0}'.format(ebstatus))
        else:
            print('EB status: not available')

        # Get the expected number of files
        Nfiles = -1
        if 'file_count' in datum:
            Nfiles = datum['file_count']

        # First action: remove files in datamanager no matter if they were already uploaded or not
        print(
            "Removing all files so far uploaded (successfully or not) in datamanager..."
        )
        filelistname = os.path.join(
            "/archive/data/rucio/xnt_%06d/*/*/" % number,
            dtype + "-" + hash + "*")
        filelist = glob.glob(filelistname)
        for filePath in filelist:
            try:
                os.remove(filePath)
            except:
                print("Error while deleting file : ", filePath)

        # Second action: complete the missing uploads on the existing rule
        print('Resuming the upload...')
        file = datum['location'].split('/')[-1]
        upload_path = os.path.join(self.DATADIR, eb, file)
        self.rc.UploadToDid(did, upload_path, self.UPLOAD_TO)

        # Third action: check if datum in DB does not exist. If not, add it and mark the EB datum as transferred
        datum_upload = None
        for d in run['data']:
            if 'did' in d:
                if d['did'] == did and d['host'] == 'rucio-catalogue' and d[
                        'location'] == self.UPLOAD_TO:
                    datum_upload = d
                    break
        if datum_upload is None:
            print(
                'The datum concerning data type {0} and site {1} is missing in the DB. It will be added'
                .format(did, self.UPLOAD_TO))

            # Update the eb data entry with status "transferred"
            self.db.db.find_one_and_update(
                {
                    '_id': run['_id'],
                    'data': {
                        '$elemMatch': {
                            'type': datum['type'],
                            'location': datum['location'],
                            'host': datum['host']
                        }
                    }
                }, {'$set': {
                    "data.$.status": "transferred"
                }})

            # Add a new data field with LNGS as RSE and with status "transferred"
            data_dict = datum.copy()
            data_dict.update({
                'host': "rucio-catalogue",
                'type': dtype,
                'location': "LNGS_USERDISK",
                'lifetime': 0,
                'status': 'transferred',
                'did': did,
                'protocol': 'rucio'
            })
            self.db.AddDatafield(run['_id'], data_dict)

        # Third action: in case the rule itself is missing, this would create it
        rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO)
        #print(rucio_rule)
        if not rucio_rule['exists']:
            print(
                'Even if files have been uploaded, the rule has not been created yet. Creating it...'
            )
            did_dictionary = [{
                'scope': did.split(':')[0],
                'name': did.split(':')[1]
            }]
            replicas = list(
                self.replicaclient.list_replicas(
                    did_dictionary, rse_expression=self.UPLOAD_TO))
            if len(replicas) != Nfiles:
                print(
                    'Error: the rule cannot be created beause the number of files uploaded ({0}) is different from the expected one ({1})'
                    .format(len(replicas), Nfiles))
                return (0)
            if rucio_rule['exists']:
                print(
                    'Error: the rule cannot be created beause it exists already'
                )
                return (0)
            os.system('rucio add-rule {0} 1 {1}'.format(did, self.UPLOAD_TO))

        # Fourth action: creating the rules abroad
        self.create_upload_rules(did)

        return (0)

    def delete_db_datum(self, did, site):

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print(
            "Removing the datum from DB for the DID: {0} and from the site {1}"
            .format(did, site))
        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))
        print("Site: {0}".format(site))

        run = self.db.db.find_one({'number': number})

        # Get the EB datum and its status
        datum = None
        for d in run['data']:
            if 'eb' in site:
                if d['type'] == dtype and site in d[
                        'host'] and 'xenon.local' in d['host']:
                    datum = d
                    break
            else:
                if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[
                        'location'] == site:
                    datum = d
                    break

        if datum is not None:
            self.db.RemoveDatafield(run['_id'], datum)
            print("Done.")
        else:
            print('There is no datum. Nothing has been deleted')

    def set_run_status(self, number, status):

        number = int(number)

        print("Setting the status of run {0} to the value {1}".format(
            number, status))

        run = self.db.db.find_one({'number': number})
        print("status before = ", run['status'])

        self.db.db.find_one_and_update({'_id': run['_id']},
                                       {'$set': {
                                           "status": status
                                       }})

        run = self.db.db.find_one({'number': number})
        print("status after = ", run['status'])

    def set_eb_status(self, did, status):

        print("Setting the EB status of DID {0} to the value {1}".format(
            did, status))

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))

        run = self.db.db.find_one({'number': number})

        # Extracts the correct Event Builder machine who processed this run
        # Then also the bootstrax state and, in case it was abandoned, the reason
        if 'bootstrax' in run:
            bootstrax = run['bootstrax']
            eb = bootstrax['host'].split('.')[0]
        else:
            print('Not processed')
            return (0)

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        if datum is None:
            print('There is no EB datum.')
            return (0)

        if ebstatus != "":
            print("EB status before = ", ebstatus)
        else:
            print("EB status absent before")

        #Set the aimed value
        #        self.db.db.find_one_and_update({'_id': run['_id'],'data': {'$elemMatch': datum}},
        #                                       {'$set': {'data.$.status': status}})

        self.db.db.find_one_and_update(
            {
                '_id': run['_id'],
                'data': {
                    '$elemMatch': {
                        'type': datum['type'],
                        'location': datum['location'],
                        'host': datum['host']
                    }
                }
            }, {'$set': {
                "data.$.status": status
            }})

        run = self.db.db.find_one({'number': number})

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        print("EB status after = ", ebstatus)

    def list_non_transferred_runs(self):

        runs = self.db.db.find({'status': "transferring"}, {
            'number': 1,
            'data': 1
        })

        #        dtypes = ["records","records_he", "records_nv", "records_mv"]
        #        dtypes = ["records_nv"]
        dtypes = ["raw_records"]

        for run in runs:

            for d in run['data']:
                if d['type'] in dtypes and d['host'] == 'rucio-catalogue' and d[
                        'location'] == 'LNGS_USERDISK':
                    print(run['number'], d['did'], d['status'], " ", end='')
                    for deb in run['data']:
                        if deb['type'] == d['type'] and 'eb' in deb['host']:
                            print(deb['host'], deb['status'], end='')
                    print("")

    def test(self):
        #        runs = self.db.db.find({'number' : "transferring"},{'number' : 1, 'data' : 1})
        #        self.db.db.find_one_and_update({'number': 23838, 'deleted_data.type' : 'raw_records' },
        #                          { '$set': { "deleted_data.$.file_count" : 44 } })
        runs = self.db.db.find(
            {
                'status': "transferred",
                'number': {
                    "$gte": 31113
                }
            }, {
                'number': 1,
                'status': 1,
                'data': 1
            })

        for run in runs:
            doit = False
            for d in run['data']:
                if d['type'] == 'afterpulses':
                    doit = True
            if doit:
                print(run['number'])


#                self.set_run_status(run['number'],'transferring' )

    def test_db_modification(self, did, new_status_name):

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print(
            "Testing how quickly a modification in DB is registered. Using DID: {0}"
            .format(did))
        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))

        run = self.db.db.find_one({'number': number})

        # Gets the status
        if 'status' in run:
            print('Run status: {0}'.format(run['status']))
        else:
            print('Run status: {0}'.format('Not available'))

        # Extracts the correct Event Builder machine who processed this run
        # Then also the bootstrax state and, in case it was abandoned, the reason
        if 'bootstrax' in run:
            bootstrax = run['bootstrax']
            eb = bootstrax['host'].split('.')[0]
        else:
            print('Not processed')
            return (0)

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        if datum is None:
            print('There is no EB datum. No reset is possible')
            return (0)

        if ebstatus != "":
            print('EB status: {0}'.format(ebstatus))
        else:
            print('EB status: not available')

        # Start the changes: set the EB status as 'eb_ready_to_upload'
        self.db.db.find_one_and_update(
            {
                '_id': run['_id'],
                'data': {
                    '$elemMatch': {
                        'type': datum['type'],
                        'location': datum['location'],
                        'host': datum['host']
                    }
                }
            }, {'$set': {
                "data.$.status": new_status_name
            }})
        print("EB status changed to {0}".format(new_status_name))

        # Reload the run
        run = self.db.db.find_one({'number': number})

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        # Prints the eb status as a confirmation of the performed change
        if ebstatus != "":
            print('New EB status: {0}'.format(ebstatus))
        else:
            print('New EB status: not available')

    def __del__(self):
        pass

    def fix_upload_db(self, did):

        hash = did.split('-')[-1]
        dtype = did.split('-')[0].split(':')[-1]
        number = int(did.split(':')[0].split('_')[-1])

        print("Fixing the upload associated to the DID: {0}".format(did))
        print("Run number: {0}".format(number))
        print("Data type: {0}".format(dtype))
        print("Hash: {0}".format(hash))

        run = self.db.db.find_one({'number': number})

        # Gets the status
        if 'status' in run:
            print('Run status: {0}'.format(run['status']))
        else:
            print('Run status: {0}'.format('Not available'))

        # Extracts the correct Event Builder machine who processed this run
        # Then also the bootstrax state and, in case it was abandoned, the reason
        if 'bootstrax' in run:
            bootstrax = run['bootstrax']
            eb = bootstrax['host'].split('.')[0]
        else:
            print('Not processed')
            return (0)

        #Checks if the LNGS datum exists already in the DB
        for d in run['data']:
            if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[
                    'location'] == "LNGS_USERDISK":
                print(
                    'The datum concerning did {0} for location {1} is already present in DB. Forced to stop'
                    .format(did, "LNGS_USERDISK"))
                return (0)

        # Get the EB datum and its status
        ebstatus = ""
        datum = None
        for d in run['data']:
            if d['type'] == dtype and eb in d['host']:
                datum = d
                if 'status' in d:
                    ebstatus = d['status']

        if datum is None:
            print('There is no EB datum. No fix is possible')
            return (0)

        # Update the eb data entry with status "transferred"
        self.db.db.find_one_and_update(
            {
                '_id': run['_id'],
                'data': {
                    '$elemMatch': {
                        'type': datum['type'],
                        'location': datum['location'],
                        'host': datum['host']
                    }
                }
            }, {'$set': {
                "data.$.status": "transferred"
            }})

        # Add a new data field with LNGS as RSE and with status "trasferred"
        data_dict = datum.copy()
        data_dict.update({
            'host': "rucio-catalogue",
            'type': dtype,
            'location': "LNGS_USERDISK",
            'lifetime': 0,
            'status': 'transferred',
            'did': did,
            'protocol': 'rucio'
        })
        self.db.AddDatafield(run['_id'], data_dict)

        if ebstatus != "":
            print('EB status: {0}'.format(ebstatus))
        else:
            print('EB status: not available')

        print('Done')

    def postpone(self):

        # Get the current screen session
        process = psutil.Process()
        screen = process.parent().parent().parent().parent().cmdline()[-1]

        # Take the tmp file of this session containing the dataset information
        filename = "/tmp/admix-" + screen

        # Destination name
        suffix = time.strftime("-%Y-%m-%d-%H-%M-%S",
                               time.localtime(time.time()))
        destination_path = helper.get_hostconfig(
        )['path_datasets_to_fix'] + "/"
        new_filename = destination_path + filename.split('/')[-1] + suffix

        if os.path.isfile(filename) and os.path.isdir(destination_path):
            shutil.move(filename, new_filename)
            print("Dataset postponed by moving file {0} to {1}".format(
                filename, new_filename))
Beispiel #12
0
class RunSync(object):
    """
    Synchronize the replica of a given run at WIPAC-ORIG 
    the corresponding Rucio site.
    """
    def __init__(self,
                 run,
                 originrse=DEFAULT_ORIGIN_RSE,
                 destrse=None,
                 scope=DEFAULT_SCOPE,
                 check=True,
                 lifetime=None,
                 dry_run=False,
                 container=None):
        """
           :param dataset: Name of the PhEDEx dataset to synchronize with Rucio.
           :param pnn: PhEDEx node name to filter on for replica information.
        """
        self.run = run
        self.originrse = originrse
        self.destrse = destrse
        self.scope = scope
        self.check = check
        self.lifetime = lifetime
        self.dry_run = dry_run
        self.container = container

        self.rucio_datasets = {}
        self.run_files = {}
        self.existent_replica_files = {}
        self.url = ''
        self.gfal = Gfal2Context()

        self.run_Number = None

        self.get_run_Number()
        self.files_storage = {}
        self.get_global_url()

        self.didc = DIDClient()
        self.repc = ReplicaClient()
        self.rulesClient = RuleClient()

        # Right now obtaining the Metadata from the storage at WIPAC
        # Hopefully in the future from JADE                                                                                                                      # TODO
        self.get_run_Files()
        self.get_rucio_metadata()
        self.update_run_Files()
        self.get_files_metadata()

    def update_run_Files(self):
        """
        Updating the run files wiht only the files that have not been registered
        """
        for f in self.existent_replica_files:
            file_name = f.split('/')[-1:][0]
            if file_name in self.run_files:
                print("File: %s already registered. Skipping it" % file_name)
                self.run_files.pop(file_name)

    def get_files_metadata(self):
        for f in self.run_files:
            if self.run + '/' + f not in self.existent_replica_files:
                self.obtain_metadata(f)
        print("Metadat initialization done")

    def obtain_metadata(self, filename):
        """
        Get the size and checksum for every file in the run from the gftp server
        """
        url = self.get_file_url(filename)
        print("checking metadata for url %s" % url)
        try:
            size = self.gfal.stat(str(url)).st_size
            adler32 = self.gfal.checksum(str(url), 'adler32')
            print(
                "got size and adler 32checksum of file: pfn=%s size=%s checksum=%s"
                % (url, size, adler32))
            self.run_files[filename] = {
                'size': size,
                'adler32': adler32,
                'name': self.run + '/' + filename
            }
        except GError:
            print("no file found at %s" % url)
            return False

    def get_file_url(self, filename):
        return self.url + '/' + self.run + '/' + filename

    def get_global_url(self):
        """
        Return the base path of the rucio url
        """
        print("Getting parameters for rse %s" % self.originrse)
        rse = rsemgr.get_rse_info(self.originrse)
        proto = rse['protocols'][0]

        schema = proto['scheme']
        prefix = proto['prefix'] + self.scope.replace('.', '/')
        if schema == 'srm':
            prefix = proto['extended_attributes']['web_service_path'] + prefix
        url = schema + '://' + proto['hostname']
        if proto['port'] != 0:
            url = url + ':' + str(proto['port'])
        self.url = url + prefix
        print("Determined base url %s" % self.url)

    def get_run_Number(self):
        """
        Obtain the run number out of whole run IceCube/2016/filtered/level2pass2/0101/Run00127347
        """
        print("Obtaining run number out of run(dataset): %s" % self.run)
        self.run_Number = self.run.split("/")[-1]
        print("Run number (dataset): %s" % self.run_Number)

    def get_run_Files(self):
        """
        Gets the list of files for a given run and their checksums from the storage
        """
        self.run_url = self.url + '/' + self.run
        print("Listin files from url : %s" % self.run_url)
        run_files = []
        try:
            run_files = self.gfal.listdir(str(self.run_url))
        except GError:
            print("No files found at %s" % str(self.run_url))
        print("Files found in storage:")
        count = 0
        for f in run_files:
            if len(f) > 3:
                if count < 5000:
                    self.run_files[f] = {}
                    count = count + 1
                else:
                    break

    def get_rucio_metadata(self):
        """                                                                                                                                         
        Gets the list of datasets at the Rucio RSE, the files, and the metadata.                                                                           
        """
        print(
            "Initializing Rucio... getting the list of blocks and files at %s"
            % self.originrse)
        registered_datasets = self.repc.list_datasets_per_rse(self.originrse)
        for dataset in registered_datasets:
            self.rucio_datasets[dataset] = {}

        replica_info = self.repc.list_replicas([{
            "scope": self.scope,
            "name": '/' + self.run_Number
        }],
                                               rse_expression="rse=%s" %
                                               self.originrse)
        replica_files = set()
        for file_info in replica_info:
            name = file_info['name']
            if self.originrse in file_info['rses']:
                replica_files.add(name)

        self.existent_replica_files = replica_files
        print("Rucio initialization done.")

    def register(self):
        """
        Create the container, the datasets and attach them to the container.
        """
        print("Registering...")
        self.register_dataset(self.run_Number)
        self.register_replicas(self.run_files)
        self.register_container(self.container)
        self.attach_dataset_to_container(self.run_Number, self.container)
        self.add_replica_rule(dataset=self.run_Number, destRSE=self.destrse)

    def register_container(self, container):
        """
        Registering the container
        """
        print("Registering the container %s with scope: %s" %
              (container, self.scope))
        if container is None:
            print('No container added, not registering any container')
            return
        if self.dry_run:
            print('Dry run only, not registering the container')
            return
        try:
            self.didc.add_container(scope=self.scope,
                                    name=container,
                                    lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print("Container %s already exists" % container)
        except InvalidObject:
            print("Problem with container name: %s" % container)

    def attach_dataset_to_container(self, dataset, container):
        """
        Attaching the dataset to a container
        """
        print("Attaching dataset %s, to container: %s" % (dataset, container))
        if container is None:
            print('No container added, not registering dataset in container')
            return
        if self.dry_run:
            print('Dry run only, not attaching dataset container')
            return
        try:
            self.didc.attach_dids(scope=self.scope,
                                  name=container,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': '/' + dataset
                                  }])
        except RucioException:
            print("dataset already attached to container")
        return

    def register_dataset(self, run):
        """
        Registering a dataset in the rucio database
        """
        print("registering dataset %s" % run)
        if self.dry_run:
            print(' Dry run only. Not creating dataset.')
            return
        try:
            self.didc.add_dataset(scope=self.scope,
                                  name=run,
                                  lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print(" Dataset %s already exists" % run)

    def register_replicas(self, replicas):
        """
        Register file replica.
        """
        if not replicas:
            return
        print("registering files in Rucio: %s" %
              ", ".join([replicas[filemd]['name'] for filemd in replicas]))
        if self.dry_run:
            print(' Dry run only. Not registering files.')
            return
        try:
            self.repc.add_replicas(rse=self.originrse,
                                   files=[{
                                       'scope':
                                       self.scope,
                                       'name':
                                       replicas[filemd]['name'],
                                       'adler32':
                                       replicas[filemd]['adler32'],
                                       'bytes':
                                       replicas[filemd]['size'],
                                   } for filemd in replicas])
            print("Adding files to dataset: %s" % self.run_Number)
        except InvalidObject:
            print("Problem with file name does not match pattern")

        for filemd in replicas:
            try:
                self.didc.attach_dids(scope=self.scope,
                                      name=self.run_Number,
                                      dids=[{
                                          'scope': self.scope,
                                          'name': replicas[filemd]['name']
                                      }])
            except FileAlreadyExists:
                print("File already attached")

    def add_replica_rule(self, destRSE, dataset):
        """
        Create a replication rule for one dataset "Run" at an RSE
        """
        print("Creating replica rule for dataset %s at rse: %s" %
              (dataset, destRSE))
        if self.dry_run:
            print(' Dry run only. Not creating rules')
            return
        if destRSE:
            try:
                self.rulesClient.add_replication_rule([{
                    "scope": self.scope,
                    "name": "/" + dataset
                }],
                                                      copies=1,
                                                      rse_expression=destRSE)
            except DuplicateRule:
                print('Rule already exists')
Beispiel #13
0
####from client####
from rucio.client.replicaclient import ReplicaClient
rep = ReplicaClient()
#did = 'ams-user-chenghsi:Acceptance_Form.jpg'.split(':')
did = 'ams-2011B-ISS.B620-pass4:1368923945.00000001.root'
#did = 'ams-2011B-ISS.B620-pass4:2011-06-14'
did_list = did.split(':')
scope = did_list[0]
filename = did_list[1]
rse_name = 'TW-EOS01_AMS02DATADISK'
adler32 = ''
md5 = ''
bytes = 0
#print 'before:'
for x in rep.list_replicas([{'scope': scope, 'name': filename}]):
    adler32 = x['adler32']
    md5 = x['md5']
    bytes = x['bytes']
    print adler32, md5, bytes
#from rucio.client.didclient import DIDClient
#did = DIDClient()
#file_meta = did.get_metadata(scope, filename) 
#rep.delete_replicas(rse_name, [{'scope': scope, 'name': filename}])
#print 'after deletion:'
#for x in rep.list_replicas([{'scope': scope, 'name': filename}]):
#    print x
#rep.add_replica(rse_name, scope, filename, bytes, adler32, md5, file_meta)
print 'test'
#print 'after add:'
#for x in rep.list_replicas([{'scope': scope, 'name': filename}]):
#    print x
GFAL = Gfal2Context()

try:
    SIZE = GFAL.stat(str(URL)).st_size
    CHECKSUM = GFAL.checksum(str(URL), 'adler32')
    print("Registering file: pfn=%s size=%s checksum=%s" %
          (URL, SIZE, CHECKSUM))
except GError:
    print("no file found at %s" % URL)
    exit()

R = ReplicaClient()

REPLICAS = list(
    R.list_replicas([{
        'scope': OPTIONS.scope,
        'name': OPTIONS.name
    }]))
if REPLICAS:
    REPLICAS = REPLICAS[0]
    if 'rses' in REPLICAS:
        if OPTIONS.rse in REPLICAS['rses']:
            print("file %s with scope %s has already a replica at %s" %
                  (OPTIONS.name, OPTIONS.scope, OPTIONS.rse))
            exit()

REPLICA = [{
    'scope': OPTIONS.scope,
    'name': OPTIONS.name,
    'adler32': CHECKSUM,
    'bytes': SIZE,
    'pfn': URL
Beispiel #15
0
class DatasetInjector(object):
    """
    General Class for injecting a cms dataset in rucio
    """
    def __init__(self,
                 dataset,
                 site,
                 rse=None,
                 scope=DEFAULT_SCOPE,
                 uuid=None,
                 check=True,
                 lifetime=None,
                 dry_run=False):
        self.dataset = dataset
        self.site = site
        if rse is None:
            rse = site
        self.rse = rse
        self.scope = scope
        self.uuid = uuid
        self.check = check
        self.lifetime = lifetime
        self.dry_run = dry_run

        self.blocks = []
        self.url = ''

        self.getmetadata()
        self.get_global_url()
        self.didc = DIDClient()
        self.repc = ReplicaClient()

        self.gfal = Gfal2Context()

    def get_file_url(self, lfn):
        """
        Return the rucio url of a file.
        """
        return self.url + '/' + lfn

    def get_global_url(self):
        """
        Return the base path of the rucio url
        """
        print("Getting parameters for rse %s" % self.rse)
        rse = rsemgr.get_rse_info(self.rse)
        proto = rse['protocols'][0]

        schema = proto['scheme']
        prefix = proto['prefix'] + '/' + self.scope.replace('.', '/')
        if schema == 'srm':
            prefix = proto['extended_attributes']['web_service_path'] + prefix
        url = schema + '://' + proto['hostname']
        if proto['port'] != 0:
            url = url + ':' + str(proto['port'])
        self.url = url + prefix
        print("Determined base url %s" % self.url)

    def getmetadata(self):
        """
        Gets the list of blocks at a site, their files and their metadata
        """
        print("Initializing... getting the list of blocks and files")
        blocks = das_go_client("block dataset=%s site=%s system=phedex" %
                               (self.dataset, self.site))
        for item in blocks:
            uuid = item['block'][0]['name'].split('#')[1]
            if (self.uuid is None) or (uuid == self.uuid):
                block = {'name': item['block'][0]['name'], 'files': []}
                files = das_go_client("file block=%s site=%s system=phedex" %
                                      (block['name'], self.site))
                for item2 in files:
                    cksum = re.match(r"adler32:([^,]+)",
                                     item2['file'][0]['checksum'])
                    cksum = cksum.group(0).split(':')[1]
                    cksum = "{0:0{1}x}".format(int(cksum, 16), 8)
                    block['files'].append({
                        'name': item2['file'][0]['name'],
                        'checksum': cksum,
                        'size': item2['file'][0]['size']
                    })
                self.blocks.append(block)
        print("Initalization done.")

    def register(self):
        """
        Create the container, the  datasets and attach them to the container.
        """
        print("Registering...")
        self.register_container()
        for block in self.blocks:
            self.register_dataset(block['name'])
            for filemd in block['files']:
                self.register_replica(filemd)
                self.attach_file(filemd['name'], block['name'])
        print("All datasets, blocks and files registered")

    def register_container(self):
        """
        Create the container.
        """

        print("registering container %s" % self.dataset)
        if self.dry_run:
            print(' Dry run only. Not creating container.')
            return

        try:
            self.didc.add_container(scope=self.scope,
                                    name=self.dataset,
                                    lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print(" Container %s already exists" % self.dataset)

    def register_dataset(self, block):
        """
        Create the dataset and attach them to teh container
        """
        print("registering dataset %s" % block)

        if self.dry_run:
            print(' Dry run only. Not creating dataset.')
            return

        try:
            self.didc.add_dataset(scope=self.scope,
                                  name=block,
                                  lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print(" Dataset %s already exists" % block)

        try:
            print("attaching dataset %s to container %s" %
                  (block, self.dataset))
            self.didc.attach_dids(scope=self.scope,
                                  name=self.dataset,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': block
                                  }])
        except RucioException:
            print(" Dataset already attached")

    def attach_file(self, lfn, block):
        """
        Attach the file to the container
        """

        if self.dry_run:
            print(' Dry run only. Not attaching files.')
            return

        try:
            print("attaching file %s" % lfn)
            self.didc.attach_dids(scope=self.scope,
                                  name=block,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': lfn
                                  }])
        except FileAlreadyExists:
            print("File already attached")

    def register_replica(self, filemd):
        """
        Register file replica.
        """
        print("registering file %s" % filemd['name'])

        if self.dry_run:
            print(' Dry run only. Not registering files.')
            return

        if self.check:
            self.check_storage(filemd)
        if not self.check_replica(filemd['name']):
            self.repc.add_replicas(rse=self.rse,
                                   files=[{
                                       'scope':
                                       self.scope,
                                       'name':
                                       filemd['name'],
                                       'adler32':
                                       filemd['checksum'],
                                       'bytes':
                                       filemd['size'],
                                       'pfn':
                                       self.get_file_url(filemd['name'])
                                   }])

    def check_storage(self, filemd):
        """
        Check size and checksum of a file on storage
        """
        url = self.get_file_url(filemd['name'])
        print("checking url %s" % url)
        try:
            size = self.gfal.stat(str(url)).st_size
            checksum = self.gfal.checksum(str(url), 'adler32')
            print("got size and checksum of file: pfn=%s size=%s checksum=%s" %
                  (url, size, checksum))
        except GError:
            print("no file found at %s" % url)
            return False
        if str(size) != str(filemd['size']):
            print("wrong size for file %s. Expected %s got %s" %
                  (filemd['name'], filemd['size'], size))
            return False
        if str(checksum) != str(filemd['checksum']):
            print("wrong checksum for file %s. Expected %s git %s" %
                  (filemd['name'], filemd['checksum'], checksum))
            return False
        print("size and checksum are ok")
        return True

    def check_replica(self, lfn):
        """
        Check if a replica of the given file at the site already exists.
        """
        print("checking if file %s with scope %s has already a replica at %s" %
              (lfn, self.scope, self.rse))
        replicas = list(
            self.repc.list_replicas([{
                'scope': self.scope,
                'name': lfn
            }]))
        if replicas:
            replicas = replicas[0]
            if 'rses' in replicas:
                if self.rse in replicas['rses']:
                    print("file %s with scope %s has already a replica at %s" %
                          (lfn, self.scope, self.rse))
                    return True
        print("no existing replicas")
        return False
Beispiel #16
0
class TestArchive(object):
    def __init__(self):
        self.dc = DIDClient()
        self.rc = ReplicaClient()

        if config_get_bool('common',
                           'multi_vo',
                           raise_exception=False,
                           default=False):
            self.vo = {'vo': 'tst'}
        else:
            self.vo = {}

    def test_add_and_list_archive(self):
        """  ARCHIVE (CLIENT): Add files to archive and list the content """
        scope, rse = 'mock', 'MOCK'
        archive_files = ['file_' + generate_uuid() + '.zip' for _ in range(2)]
        files = []
        for i in range(10):
            files.append({
                'scope': scope,
                'name': 'lfn.%s' % str(generate_uuid()),
                'bytes': 724963570,
                'adler32': '0cc737eb',
                'type': 'FILE',
                'meta': {
                    'guid': str(generate_uuid())
                }
            })
        for archive_file in archive_files:

            self.rc.add_replicas(rse=rse,
                                 files=[{
                                     'scope': scope,
                                     'name': archive_file,
                                     'bytes': 1,
                                     'adler32': '0cc737eb'
                                 }])

            self.dc.add_files_to_archive(scope=scope,
                                         name=archive_file,
                                         files=files)

            content = [
                f for f in self.dc.list_archive_content(scope=scope,
                                                        name=archive_file)
            ]

            assert_equal(len(content), 10)

    def test_list_archive_contents_transparently(self):
        """ ARCHIVE (CORE): Transparent archive listing """

        scope = InternalScope('mock', **self.vo)
        rse = 'APERTURE_%s' % rse_name_generator()
        rse_id = add_rse(rse, **self.vo)
        root = InternalAccount('root', **self.vo)

        add_protocol(
            rse_id, {
                'scheme': 'root',
                'hostname': 'root.aperture.com',
                'port': 1409,
                'prefix': '//test/chamber/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })

        # register archive
        archive = {
            'scope': scope,
            'name': 'weighted.storage.cube.zip',
            'type': 'FILE',
            'bytes': 2596,
            'adler32': 'beefdead'
        }
        archive_client = archive.copy()
        archive_client['scope'] = archive_client['scope'].external

        add_replicas(rse_id=rse_id, files=[archive], account=root)

        # archived files with replicas
        files_with_replicas = [{
            'scope':
            scope,
            'name':
            'witrep-%i-%s' % (i, str(generate_uuid())),
            'type':
            'FILE',
            'bytes':
            1234,
            'adler32':
            'deadbeef'
        } for i in range(2)]
        files_with_replicas_client = []
        for f in files_with_replicas:
            new_file = f.copy()
            new_file['scope'] = new_file['scope'].external
            files_with_replicas_client.append(new_file)

        add_replicas(rse_id=rse_id, files=files_with_replicas, account=root)
        self.dc.add_files_to_archive(scope=scope.external,
                                     name=archive_client['name'],
                                     files=files_with_replicas_client)

        res = [
            r['pfns'] for r in self.rc.list_replicas(dids=[{
                'scope': scope.external,
                'name': f['name']
            } for f in files_with_replicas_client],
                                                     resolve_archives=True)
        ]
        assert_equal(len(res), 2)
        assert_equal(len(res[0]), 2)
        assert_equal(len(res[1]), 2)
        for r in res:
            for p in r:
                if r[p]['domain'] == 'zip':
                    assert_in('weighted.storage.cube.zip?xrdcl.unzip=witrep-',
                              p)
                else:
                    assert_not_in(
                        'weighted.storage.cube.zip?xrdcl.unzip=witrep-', p)

        # archived files without replicas
        files = [{
            'scope': scope.external,
            'name': 'norep-%i-%s' % (i, str(generate_uuid())),
            'type': 'FILE',
            'bytes': 1234,
            'adler32': 'deadbeef'
        } for i in range(2)]
        self.dc.add_files_to_archive(scope=scope.external,
                                     name=archive_client['name'],
                                     files=files)
        res = [
            r['pfns'] for r in self.rc.list_replicas(dids=[{
                'scope': scope.external,
                'name': f['name']
            } for f in files],
                                                     resolve_archives=True)
        ]
        assert_equal(len(res), 2)
        for r in res:
            assert_in('weighted.storage.cube.zip?xrdcl.unzip=norep-',
                      r.keys()[0])

    def test_list_archive_contents_at_rse(self):
        """ ARCHIVE (CORE): Transparent archive listing at RSE """

        scope = InternalScope('mock', **self.vo)
        root = InternalAccount('root', **self.vo)

        rse1 = 'APERTURE_%s' % rse_name_generator()
        rse1_id = add_rse(rse1, **self.vo)
        add_protocol(
            rse1_id, {
                'scheme': 'root',
                'hostname': 'root.aperture.com',
                'port': 1409,
                'prefix': '//test/chamber/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })

        rse2 = 'BLACKMESA_%s' % rse_name_generator()
        rse2_id = add_rse(rse2, **self.vo)
        add_protocol(
            rse2_id, {
                'scheme': 'root',
                'hostname': 'root.blackmesa.com',
                'port': 1409,
                'prefix': '//lambda/complex/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })

        # register archive
        archive1 = {
            'scope': scope,
            'name': 'cube.1.zip',
            'type': 'FILE',
            'bytes': 2596,
            'adler32': 'beefdead'
        }
        archive2 = {
            'scope': scope,
            'name': 'cube.2.zip',
            'type': 'FILE',
            'bytes': 5432,
            'adler32': 'deadbeef'
        }
        add_replicas(rse_id=rse1_id, files=[archive1], account=root)
        add_replicas(rse_id=rse2_id, files=[archive2], account=root)

        # archived files with replicas
        archived_file = [{
            'scope': scope.external,
            'name': 'zippedfile-%i-%s' % (i, str(generate_uuid())),
            'type': 'FILE',
            'bytes': 4322,
            'adler32': 'beefbeef'
        } for i in range(2)]
        self.dc.add_files_to_archive(scope=scope.external,
                                     name=archive1['name'],
                                     files=archived_file)
        self.dc.add_files_to_archive(scope=scope.external,
                                     name=archive2['name'],
                                     files=archived_file)

        res = [
            r['pfns'] for r in self.rc.list_replicas(dids=[{
                'scope': f['scope'],
                'name': f['name']
            } for f in archived_file],
                                                     rse_expression=rse1,
                                                     resolve_archives=True)
        ]

        res = self.rc.list_replicas(dids=[{
            'scope': f['scope'],
            'name': f['name']
        } for f in archived_file],
                                    metalink=True,
                                    rse_expression=rse1,
                                    resolve_archives=True)
        assert_in('APERTURE', res)
        assert_not_in('BLACKMESA', res)

        res = self.rc.list_replicas(dids=[{
            'scope': f['scope'],
            'name': f['name']
        } for f in archived_file],
                                    metalink=True,
                                    rse_expression=rse2,
                                    resolve_archives=True)
        assert_in('BLACKMESA', res)
        assert_not_in('APERTURE', res)
Beispiel #17
0

try:
    SIZE = os.stat(PREFIX+'/'+OPTIONS.pfn).st_size
    CHECKSUM = adler32(PREFIX+'/'+OPTIONS.pfn)
#   SIZE = GFAL.stat(str(URL)).st_size
#   CHECKSUM = GFAL.checksum(str(URL), 'adler32')
    print("Registering file: pfn=%s size=%s checksum=%s" % (URL, SIZE, CHECKSUM))
#except GError:
except:
    print("no file found at %s" % URL)
    exit()

R = ReplicaClient()

REPLICAS = list(R.list_replicas([{'scope': OPTIONS.scope, 'name': OPTIONS.name}]))
if REPLICAS:
    REPLICAS = REPLICAS[0]
    if 'rses' in REPLICAS:
        if OPTIONS.rse in REPLICAS['rses']:
            print("file %s with scope %s has already a replica at %s" %
                  (OPTIONS.name, OPTIONS.scope, OPTIONS.rse))
            exit()



REPLICA = [{
    'scope': OPTIONS.scope,
    'name' : OPTIONS.name,
    'adler32': CHECKSUM,
    'bytes': SIZE,
Beispiel #18
0
class TestDIDClients:

    def setup(self):
        self.account_client = AccountClient()
        self.scope_client = ScopeClient()
        self.meta_client = MetaClient()
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.rse_client = RSEClient()

    def test_list_dids(self):
        """ DATA IDENTIFIERS (CLIENT): List dids by pattern."""
        tmp_scope = scope_name_generator()
        tmp_files = []
        tmp_files.append('file_a_1%s' % generate_uuid())
        tmp_files.append('file_a_2%s' % generate_uuid())
        tmp_files.append('file_b_1%s' % generate_uuid())
        tmp_rse = 'MOCK'

        self.scope_client.add_scope('jdoe', tmp_scope)
        for tmp_file in tmp_files:
            self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')

        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 1)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_*_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 3)
        results = []

        filters = {'name': 'file*', 'created_after': datetime.utcnow() - timedelta(hours=1)}
        for result in self.did_client.list_dids(tmp_scope, filters):
            results.append(result)
        assert_equal(len(results), 0)
        with assert_raises(UnsupportedOperation):
            self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype')

    def test_list_recursive(self):
        """ DATA IDENTIFIERS (CLIENT): List did recursive """
        # Create nested containers and datast
        tmp_scope_1 = 'list-did-recursive'
        tmp_scope_2 = 'list-did-recursive-2'
        self.scope_client.add_scope('root', tmp_scope_1)
        self.scope_client.add_scope('root', tmp_scope_2)

        tmp_container_1 = 'container_%s' % generate_uuid()
        self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_1)

        tmp_container_2 = 'container_%s' % generate_uuid()
        self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_2)

        tmp_dataset_1 = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope_2, name=tmp_dataset_1)

        tmp_dataset_2 = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope_1, name=tmp_dataset_2)

        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_2, 'name': tmp_dataset_1}])
        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_2, dids=[{'scope': tmp_scope_1, 'name': tmp_dataset_2}])
        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_1, 'name': tmp_container_2}])

        # List DIDs not recursive - only the first container is expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=False, type='all', filters={'name': tmp_container_1})]
        assert_equal(dids, [tmp_container_1])

        # List DIDs recursive - first container and all attached collections are expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='all', filters={'name': tmp_container_1})]
        assert_true(tmp_container_1 in dids)
        assert_true(tmp_container_2 in dids)
        assert_true(tmp_dataset_1 in dids)
        assert_true(tmp_dataset_2 in dids)
        assert_equal(len(dids), 4)

        # List DIDs recursive - only containers are expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='container', filters={'name': tmp_container_1})]
        assert_true(tmp_container_1 in dids)
        assert_true(tmp_container_2 in dids)
        assert_true(tmp_dataset_1 not in dids)
        assert_true(tmp_dataset_2 not in dids)
        assert_equal(len(dids), 2)

    def test_list_by_length(self):
        """ DATA IDENTIFIERS (CLIENT): List did with length """
        tmp_scope = 'mock'

        tmp_dsn = 'dsn_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn)

        dids = self.did_client.list_dids(tmp_scope, {'length.gt': 0})
        results = []
        for d in dids:
            results.append(d)
        assert_not_equal(len(results), 0)

        dids = self.did_client.list_dids(tmp_scope, {'length.gt': -1, 'length.lt': 1})
        results = []
        for d in dids:
            results.append(d)
        assert_equal(len(results), 0)

        dids = self.did_client.list_dids(tmp_scope, {'length': 0})
        results = []
        for d in dids:
            results.append(d)
        assert_equal(len(results), 0)

    def test_list_by_metadata(self):
        """ DATA IDENTIFIERS (CLIENT): List did with metadata"""
        dsns = []
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn1)

        dataset_meta = {'project': 'data12_8TeV',
                        'run_number': 400000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m920',
                        }
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta)
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn2)
        dataset_meta['run_number'] = 400001
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta)

        tmp_dsn3 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn3)
        dataset_meta['stream_name'] = 'physics_Egamma'
        dataset_meta['datatype'] = 'NTUP_SMWZ'
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn1)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn2)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)

        with assert_raises(KeyNotFound):
            self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'})

    def test_add_did(self):
        """ DATA IDENTIFIERS (CLIENT): Add, populate, list did content and create a sample"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        tmp_dsn = 'dsn_%s' % generate_uuid()
        root = InternalAccount('root')
        set_local_account_limit(root, get_rse_id('MOCK'), -1)
        set_local_account_limit(root, get_rse_id('CERN-PROD_TZERO'), -1)

        # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename
        dataset_meta = {'project': 'data13_hip',
                        'run_number': 300000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m927',
                        }
        rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}]

        with assert_raises(ScopeNotFound):
            self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules)

        files = [{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb'}, ]
        with assert_raises(DataIdentifierNotFound):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files)

        files = []
        for i in range(5):
            lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 10}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})

        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2), 'account': 'root'}]

        with assert_raises(InvalidPath):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files, rse=tmp_rse)

        files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files]
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files_without_pfn, rse=tmp_rse)

        with assert_raises(DataIdentifierAlreadyExists):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)

        files = []
        for i in range(5):
            lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 100}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})
        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}]

        with assert_raises(InvalidPath):
            self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)
        files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files_without_pfn, rse=tmp_rse)

        self.did_client.close(scope=tmp_scope, name=tmp_dsn)

        tmp_dsn_output = 'dsn_%s' % generate_uuid()
        self.did_client.create_did_sample(input_scope=tmp_scope, input_name=tmp_dsn, output_scope=tmp_scope, output_name=tmp_dsn_output, nbfiles=2)
        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=tmp_dsn_output)]
        assert_equal(len(files), 2)

    def test_attach_dids_to_dids(self):
        """ DATA IDENTIFIERS (CLIENT): Attach dids to dids"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        nb_datasets = 5
        nb_files = 5
        attachments, dsns = list(), list()
        guid_to_query = None
        dsn = {}
        for i in range(nb_datasets):
            attachment = {}
            attachment['scope'] = tmp_scope
            attachment['name'] = 'dsn.%s' % str(generate_uuid())
            attachment['rse'] = tmp_rse
            files = []
            for i in range(nb_files):
                files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                              'bytes': 724963570, 'adler32': '0cc737eb',
                              'meta': {'guid': str(generate_uuid()), 'events': 100}})
            attachment['dids'] = files
            guid_to_query = files[0]['meta']['guid']
            dsn = {'scope': tmp_scope, 'name': attachment['name']}
            dsns.append(dsn)
            attachments.append(attachment)

        self.did_client.add_datasets(dsns=dsns)
        self.did_client.attach_dids_to_dids(attachments=attachments)
        dsns_l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)]

        assert_equal([dsn], dsns_l)

        cnt_name = 'cnt_%s' % generate_uuid()
        self.did_client.add_container(scope='mock', name=cnt_name)
        with assert_raises(UnsupportedOperation):
            self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}])

    def test_add_files_to_datasets(self):
        """ DATA IDENTIFIERS (CLIENT): Add files to Datasets"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        dsn1 = 'dsn.%s' % str(generate_uuid())
        dsn2 = 'dsn.%s' % str(generate_uuid())
        meta = {'transient': True}
        files1, files2, nb_files = [], [], 5
        for i in range(nb_files):
            files1.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                           'bytes': 724963570, 'adler32': '0cc737eb',
                           'meta': {'guid': str(generate_uuid()), 'events': 100}})
            files2.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                           'bytes': 724963570, 'adler32': '0cc737eb',
                           'meta': {'guid': str(generate_uuid()), 'events': 100}})

        self.did_client.add_dataset(scope=tmp_scope, name=dsn1, files=files1,
                                    rse=tmp_rse, meta=meta)
        self.did_client.add_dataset(scope=tmp_scope, name=dsn2, files=files2,
                                    rse=tmp_rse, meta=meta)

        attachments = [{'scope': tmp_scope, 'name': dsn1, 'dids': files2, 'rse': tmp_rse},
                       {'scope': tmp_scope, 'name': dsn2, 'dids': files1, 'rse': tmp_rse}]

        self.did_client.add_files_to_datasets(attachments)

        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)]
        assert_equal(len(files), 10)

        with assert_raises(FileAlreadyExists):
            self.did_client.add_files_to_datasets(attachments)

        for attachment in attachments:
            for i in range(nb_files):
                attachment['dids'].append({'scope': tmp_scope,
                                           'name': 'lfn.%s' % str(generate_uuid()),
                                           'bytes': 724963570,
                                           'adler32': '0cc737eb',
                                           'meta': {'guid': str(generate_uuid()),
                                                    'events': 100}})

        self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True)

        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)]
        assert_equal(len(files), 15)

        # Corrupt meta-data
        files = []
        for attachment in attachments:
            for file in attachment['dids']:
                file['bytes'] = 1000
                break

        with assert_raises(FileConsistencyMismatch):
            self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True)

    def test_add_dataset(self):
        """ DATA IDENTIFIERS (CLIENT): Add dataset """
        tmp_scope = 'mock'
        tmp_dsn = 'dsn_%s' % generate_uuid()

        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'})

        did = self.did_client.get_did(tmp_scope, tmp_dsn)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_dsn)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_add_datasets(self):
        """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """
        tmp_scope = 'mock'
        dsns = list()
        for i in range(500):
            tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}}
            dsns.append(tmp_dsn)
        self.did_client.add_datasets(dsns)

    def test_exists(self):
        """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """
        tmp_scope = 'mock'
        tmp_file = 'file_%s' % generate_uuid()
        tmp_rse = 'MOCK'

        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')

        did = self.did_client.get_did(tmp_scope, tmp_file)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_file)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_did_hierarchy(self):
        """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(4)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb')
        for i in range(4):
            self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None)
        for i in range(4):
            self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None)

        for i in range(4):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}])
        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            pass
            # TODO: fix, fix, fix
            # if r['name'] == cnt[1]:
            #    assert_equal(r['type'], 'container')
            #    assert_equal(r['level'], 0)
            # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]):
            #    assert_equal(r['level'], 0)
            # else:
            #     assert_equal(r['level'], 1)

    def test_detach_did(self):
        """ DATA IDENTIFIERS (CLIENT): Detach dids from a did"""

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(5)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(2)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb')
        for i in range(5):
            self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None)
        for i in range(2):
            self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None)

        for i in range(5):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}])

        with assert_raises(UnsupportedOperation):
            self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}])

        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}])
        self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}])
        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            if r['name'] == dst[1]:
                assert_equal(r['level'], 0)
            if r['type'] == 'file':
                if (r['name'] in file[6:9]):
                    assert_equal(r['level'], 0)
                else:
                    assert_not_equal(r['level'], 0)

        with assert_raises(UnsupportedOperation):
            self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}])

        self.did_client.close(scope, dst[4])
        metadata = self.did_client.get_metadata(scope, dst[4])
        i_bytes, i_length = metadata['bytes'], metadata['length']
        metadata = self.did_client.get_metadata(scope, file[8])
        file1_bytes = metadata['bytes']
        metadata = self.did_client.get_metadata(scope, file[9])
        file2_bytes = metadata['bytes']
        self.did_client.detach_dids(scope, dst[4], [{'scope': scope, 'name': file[8]}, {'scope': scope, 'name': file[9]}])
        metadata = self.did_client.get_metadata(scope, dst[4])
        f_bytes, f_length = metadata['bytes'], metadata['length']
        assert_equal(i_bytes, f_bytes + file1_bytes + file2_bytes)
        assert_equal(i_length, f_length + 1 + 1)

    def test_scope_list(self):
        """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """

        # create some dummy data
        self.tmp_accounts = ['jdoe' for i in range(3)]
        self.tmp_scopes = [scope_name_generator() for i in range(3)]
        self.tmp_rses = [rse_name_generator() for i in range(3)]
        self.tmp_files = ['file_%s' % generate_uuid() for i in range(3)]
        self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in range(3)]
        self.tmp_containers = ['container_%s' % generate_uuid() for i in range(3)]

        # add dummy data to the catalogue
        for i in range(3):
            self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i])
            self.rse_client.add_rse(self.tmp_rses[i])
            self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1, '0cc737eb')

        # put files in datasets
        for i in range(3):
            for j in range(3):
                files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1, 'adler32': '0cc737eb'}]
                self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j])
                self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files)

        # put datasets in containers
        for i in range(3):
            for j in range(3):
                datasets = [{'scope': self.tmp_scopes[j], 'name': self.tmp_datasets[j]}]
                self.did_client.add_container(self.tmp_scopes[i], self.tmp_containers[j])
                self.did_client.add_datasets_to_container(self.tmp_scopes[i], self.tmp_containers[j], datasets)

        # reverse check if everything is in order
        for i in range(3):
            result = self.did_client.scope_list(self.tmp_scopes[i], recursive=True)

            r_topdids = []
            r_otherscopedids = []
            r_scope = []
            for r in result:
                if r['level'] == 0:
                    r_topdids.append(r['scope'] + ':' + r['name'])
                    r_scope.append(r['scope'])
                if r['scope'] != self.tmp_scopes[i]:
                    r_otherscopedids.append(r['scope'] + ':' + r['name'])
                    assert_in(r['level'], [1, 2])

            for j in range(3):
                assert_equal(self.tmp_scopes[i], r_scope[j])
                if j != i:
                    assert_in(self.tmp_scopes[j] + ':' + self.tmp_files[j], r_otherscopedids)
            assert_not_in(self.tmp_scopes[i] + ':' + self.tmp_files[i], r_topdids)

    def test_get_did(self):
        """ DATA IDENTIFIERS (CLIENT): add a new data identifier and try to retrieve it back"""
        rse = 'MOCK'
        scope = 'mock'
        file = generate_uuid()
        dsn = generate_uuid()

        self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb')

        did = self.did_client.get_did(scope, file)

        assert_equal(did['scope'], scope)
        assert_equal(did['name'], file)

        self.did_client.add_dataset(scope=scope, name=dsn, lifetime=10000000)
        did2 = self.did_client.get_did(scope, dsn)
        assert_equal(type(did2['expired_at']), datetime)

    def test_get_meta(self):
        """ DATA IDENTIFIERS (CLIENT): add a new meta data for an identifier and try to retrieve it back"""
        rse = 'MOCK'
        scope = 'mock'
        file = generate_uuid()
        keys = ['project', 'run_number']
        values = ['data13_hip', 12345678]

        self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb')
        for i in range(2):
            self.did_client.set_metadata(scope, file, keys[i], values[i])

        meta = self.did_client.get_metadata(scope, file)

        for i in range(2):
            assert_equal(meta[keys[i]], values[i])

    def test_list_content(self):
        """ DATA IDENTIFIERS (CLIENT): test to list contents for an identifier"""
        rse = 'MOCK'
        scope = 'mock'
        nbfiles = 5
        dataset1 = generate_uuid()
        dataset2 = generate_uuid()
        container = generate_uuid()
        files1 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)]
        files2 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)]

        self.did_client.add_dataset(scope, dataset1)

        with assert_raises(DataIdentifierAlreadyExists):
            self.did_client.add_dataset(scope, dataset1)

        self.did_client.add_files_to_dataset(scope, dataset1, files1, rse=rse)

        self.did_client.add_dataset(scope, dataset2)
        self.did_client.add_files_to_dataset(scope, dataset2, files2, rse=rse)

        self.did_client.add_container(scope, container)
        datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}]
        self.did_client.add_datasets_to_container(scope, container, datasets)

        contents = self.did_client.list_content(scope, container)

        datasets_s = [d['name'] for d in contents]
        assert_in(dataset1, datasets_s)
        assert_in(dataset2, datasets_s)

    def test_list_files(self):
        """ DATA IDENTIFIERS (CLIENT): List files for a container"""
        rse = 'MOCK'
        scope = 'mock'
        dataset1 = generate_uuid()
        dataset2 = generate_uuid()
        container = generate_uuid()
        files1 = []
        files2 = []
        for i in range(10):
            files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})
            files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})

        for i in range(10):
            self.replica_client.add_replica(rse, scope, files1[i]['name'], 1, '0cc737eb')
            self.replica_client.add_replica(rse, scope, files2[i]['name'], 1, '0cc737eb')

        self.did_client.add_dataset(scope, dataset1)
        self.did_client.add_files_to_dataset(scope, dataset1, files1)

        self.did_client.add_dataset(scope, dataset2)
        self.did_client.add_files_to_dataset(scope, dataset2, files2)
        datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}]
        self.did_client.add_container(scope, container)
        self.did_client.add_datasets_to_container(scope, container, datasets)

        # List file content
        content = self.did_client.list_files(scope, files1[i]['name'])
        assert_true(content is not None)
        for d in content:
            assert_true(d['name'] == files1[i]['name'])

        # List container content
        for d in [{'name': x['name'], 'scope': x['scope'], 'bytes': x['bytes'], 'adler32': x['adler32']} for x in self.did_client.list_files(scope, container)]:
            assert_in(d, files1 + files2)

        # List non-existing data identifier content
        with assert_raises(DataIdentifierNotFound):
            self.did_client.list_files(scope, 'Nimportnawak')

    def test_list_replicas(self):
        """ DATA IDENTIFIERS (CLIENT): List replicas for a container"""
        rse = 'MOCK'
        scope = 'mock'
        dsn1 = generate_uuid()
        dsn2 = generate_uuid()
        cnt = generate_uuid()
        files1 = []
        files2 = []
        for i in range(10):
            files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})
            files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})

        self.did_client.add_dataset(scope, dsn1)
        self.did_client.add_files_to_dataset(scope, dsn1, files1, rse=rse)

        self.did_client.add_dataset(scope, dsn2)
        self.did_client.add_files_to_dataset(scope, dsn2, files2, rse=rse)

        self.did_client.add_container(scope, cnt)
        self.did_client.add_datasets_to_container(scope, cnt, [{'scope': scope, 'name': dsn1}, {'scope': scope, 'name': dsn2}])

        replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': dsn1}])
        assert_true(replicas is not None)

        replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': cnt}])
        assert_true(replicas is not None)

    @raises(UnsupportedOperation)
    def test_close(self):
        """ DATA IDENTIFIERS (CLIENT): test to close data identifiers"""

        tmp_rse = 'MOCK'
        tmp_scope = 'mock'

        # Add dataset
        tmp_dataset = 'dsn_%s' % generate_uuid()

        # Add file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb')

        # Add dataset
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset)

        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Add a second file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Close dataset
        with assert_raises(UnsupportedStatus):
            self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False)
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False)

        # Add a third file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.attach_dids(scope=tmp_scope, name=tmp_dataset, dids=files)

    @raises
    def test_open(self):
        """ DATA IDENTIFIERS (CLIENT): test to re-open data identifiers for priv account"""

        tmp_rse = 'MOCK'
        tmp_scope = 'mock'

        # Add dataset
        tmp_dataset = 'dsn_%s' % generate_uuid()

        # Add file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb')

        # Add dataset
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset)

        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Add a second file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Close dataset
        with assert_raises(UnsupportedStatus):
            self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False)
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False)

        # Add a third file replica
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=True)

    def test_bulk_get_meta(self):
        """ DATA IDENTIFIERS (CLIENT): Add a new meta data for a list of DIDs and try to retrieve them back"""
        key = 'project'
        rse = 'MOCK'
        scope = 'mock'
        files = ['file_%s' % generate_uuid() for _ in range(4)]
        dst = ['dst_%s' % generate_uuid() for _ in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for _ in range(4)]
        meta_mapping = {}
        list_dids = []
        for idx in range(4):
            self.replica_client.add_replica(rse, scope, files[idx], 1, '0cc737eb')
            self.did_client.set_metadata(scope, files[idx], key, 'file_%s' % idx)
            list_dids.append({'scope': scope, 'name': files[idx]})
            meta_mapping['%s:%s' % (scope, files[idx])] = (key, 'file_%s' % idx)
        for idx in range(4):
            self.did_client.add_did(scope, dst[idx], 'DATASET', statuses=None, meta={key: 'dsn_%s' % idx}, rules=None)
            list_dids.append({'scope': scope, 'name': dst[idx]})
            meta_mapping['%s:%s' % (scope, dst[idx])] = (key, 'dsn_%s' % idx)
        for idx in range(4):
            self.did_client.add_did(scope, cnt[idx], 'CONTAINER', statuses=None, meta={key: 'cnt_%s' % idx}, rules=None)
            list_dids.append({'scope': scope, 'name': cnt[idx]})
            meta_mapping['%s:%s' % (scope, cnt[idx])] = (key, 'cnt_%s' % idx)
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        res_list_dids = [{'scope': entry['scope'], 'name': entry['name']} for entry in list_meta]
        res_list_dids.sort()
        list_dids.sort()
        assert_equal(list_dids, res_list_dids)
        for meta in list_meta:
            did = '%s:%s' % (meta['scope'], meta['name'])
            met = meta_mapping[did]
            assert_equal((key, meta[key]), met)
        cnt = ['cnt_%s' % generate_uuid() for _ in range(4)]
        for idx in range(4):
            list_dids.append({'scope': scope, 'name': cnt[idx]})
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        assert_equal(len(list_meta), 12)
        list_dids = []
        for idx in range(4):
            list_dids.append({'scope': scope, 'name': cnt[idx]})
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        assert_equal(len(list_meta), 0)
Beispiel #19
0
class TestReplicaClients:

    def setup(self):
        self.replica_client = ReplicaClient()
        self.did_client = DIDClient()

    def test_add_list_bad_replicas(self):
        """ REPLICA (CLIENT): Add bad replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK')
        rse_id1 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason')
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id1:
                    if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Run necromancer once
        run(threads=1, bulk=10000, once=True)

        # Try to attach a lost file
        tmp_dsn = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn)
        with assert_raises(UnsupportedOperation):
            self.did_client.add_files_to_dataset(tmp_scope, name=tmp_dsn, files=files, rse='MOCK')

        # Adding replicas to non-deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb',
                  'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK2')
        rse_id2 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        print(replicas, list_rep)
        r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason')
        print(r)
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id2:
                    if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Now adding non-existing bad replicas
        files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ]
        r = self.replica_client.declare_bad_file_replicas(files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_add_suspicious_replicas(self):
        """ REPLICA (CLIENT): Add suspicious replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason')
        assert_equal(r, {})

        # Adding replicas to non-deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb',
                  'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason')
        assert_equal(r, {})

        # Now adding non-existing bad replicas
        files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ]
        r = self.replica_client.declare_suspicious_file_replicas(files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_bad_replica_methods_for_UI(self):
        """ REPLICA (REST): Test the listing of bad and suspicious replicas """
        mw = []
        headers1 = {'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******'}
        r1 = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True)
        assert_equal(r1.status, 200)
        token = str(r1.header('X-Rucio-Auth-Token'))
        headers2 = {'X-Rucio-Auth-Token': str(token)}

        data = dumps({})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_files.append(dumps(line))
        nb_tot_files = len(tot_files)

        data = dumps({'state': 'B'})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_bad_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files1 = len(tot_bad_files)

        data = dumps({'state': 'S', 'list_pfns': 'True'})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_suspicious_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_suspicious_files.append(dumps(line))
        nb_tot_suspicious_files = len(tot_suspicious_files)

        assert_equal(nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files)

        tomorrow = datetime.utcnow() + timedelta(days=1)
        data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_bad_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files = len(tot_bad_files)
        assert_equal(nb_tot_bad_files, 0)

        data = dumps({})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        nb_tot_bad_files2 = 0
        for line in r2.body.split('\n'):
            if line != '':
                line = loads(line)
                nb_tot_bad_files2 += int(line['BAD'])
        assert_equal(nb_tot_bad_files1, nb_tot_bad_files2)

    def test_add_list_replicas(self):
        """ REPLICA (CLIENT): Add, change state and list file replicas """
        tmp_scope = 'mock'
        nbfiles = 5

        files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files1)

        files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files2)

        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files1])]
        assert_equal(len(replicas), len(files1))

        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['file'])]
        assert_equal(len(replicas), 5)

        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['srm'])]
        assert_equal(len(replicas), 5)

        files3 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'state': 'U', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files3)
        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'])]
        for i in range(nbfiles):
            assert_equal(replicas[i]['rses'], {})
        files4 = []
        for file in files3:
            file['state'] = 'A'
            files4.append(file)
        self.replica_client.update_replicas_states('MOCK3', files=files4)
        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'], unavailable=True)]
        assert_equal(len(replicas), 5)
        for i in range(nbfiles):
            assert_in('MOCK3', replicas[i]['rses'])

    def test_delete_replicas(self):
        """ REPLICA (CLIENT): Add and delete file replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)
        with assert_raises(AccessDenied):
            self.replica_client.delete_replicas(rse='MOCK', files=files)