Exemple #1
0
class RucioAPI():
    """Class RucioAPI()

    This class presents an approach to collect all necessary Rucio calls
    in one class. That allows easy handling of Rucio calls based on the
    Rucio API.
    If there are ever changes in the Rucio API, here is the wrapper to fix it.
    """
    def __init__(self, enable_print=False):
        """Function: __init__()

        Constructor of RucioAPI class. Comes with a setting set up the print statements to terminal

        :param enable_print: If True then enable print to terminal
        """
        self._print_to_screen = enable_print
        self._rucio_ping = None
        self._rucio_account = os.environ.get("RUCIO_ACCOUNT")
        self.ConfigHost()

    def __del__(self):
        """Function: __del__()

        Destructor - No further description
        """
        pass

    # Here comes the backend configuration part:
    def SetRucioAccount(self, rucio_account=None):
        """Function: SetRucioAccount
        :param rucio_account: The Rucio account you would like to work with
        """

        self._rucio_account = rucio_account

    def SetConfigPath(self, config_path=None):
        """Function: SetConfigPath
        This option is only important for legacy command line support and
        is ignored in RucioAPI setup.
        :param config_path: Path to CLI configuration file
        """
        pass

    def SetProxyTicket(self, proxy_path=None):
        """Function: SetProxyTicket
        This option is only important for legacy command line support and
        is ignored in RucioAPI setup.
        :param proxy_path: Path to CLI configuration file
        """
        pass

    def SetHost(self, hostname=None):
        """Function: SetHost
        This option is only important for legacy command line support and
        is ignored in RucioAPI setup.
        :param hostname: Path to CLI configuration file
        """
        pass

    def ConfigHost(self):
        """Function: ConfigHost

        This member function setup the rucioAPI backend.
        To ensure full functionality, it needs:
        * Client()
        * UploadClient()
        * DownloadClient()

        :raise Exception if Rucio API is not ready (miss-configured)
        """
        try:
            self._rucio_client = Client()
            self._rucio_client_upload = UploadClient(logger=logger)
            #            self._rucio_client_upload = UploadClient(tracing=False)
            #            print("Tracing set to False")
            self._rucio_client_download = DownloadClient()
            self._rucio_ping = self._rucio_client.ping

        except:
            print("Can not init the Rucio API")
            print("-> Check for your Rucio installation")
            exit(1)

    # finished the backend configuration for the Rucio API

    def Whoami(self):
        """RucioAPI:Whoami
        Results a dictionary to identify the current
        Rucio user and credentials.
        """
        return self._rucio_client.whoami()

    def GetRucioPing(self):
        """Function: GetRucioPing
        :return If ConfigHost is executed without execption GetRucioPing provides a Rucio ping
        """

        return self._rucio_client.ping

    #The scope section:
    def CreateScope(self, account, scope, verbose=False):
        """Function: CreateScope()

        Create a new Rucio scope what does not yet exists yet.
        Be aware that you need Rucio permissions to do it. Check your Rucio account and settings.

        :param account: The Rucio account you are working with (need to be allowed to create scopes)
        :param scope: The scope name you like to create
        :return result:
        """

        result = 1
        try:
            self._rucio_client.add_scope(account, scope)
            result = 0
        except AccessDenied as e:
            print(e)
        except Duplicate as e:
            if verbose:
                print(e)
            else:
                pass
        return result

        #Several list commands

    def GetRSE(self, rse):
        """Function: GetRSE(...)

        Return further information about the RSE setup of a specific RSE
        :param rse: A (string) valid Rucio Storage Element (RSE) name
        :return result: A dictionary which holds information according the selected RSE
        """

        result = {}
        try:
            result = self._rucio_client.get_rse(rse)
        except:
            print("No RSE attributes received for {0}".format(rse))
        return result

    def ListRSEAttributes(self, rse):
        """Function: ListRSEAttributes(...)

        Return some attributes of a Rucio Storage Element
        Received keys are fts, fts_testing, RSE-NAME, istape

        :param rse: A valid (string) Rucio Storage Element (RSE) name
        :return result: A dictionary with RSE attributes
        """

        result = {}

        try:
            result = self._rucio_client.list_rse_attributes(rse)
        except:
            print("No RSE attributes received for {0}".format(rse))
        return result

    def ListRSEs(self):
        """Function: ListRSEs

        Returns an overview about all registered Rucio Storage elements in the current setup

        :return result: A list of dictionaries. Each dictionary holds RSE information. If not successful []
        """
        result = []
        try:
            result = list(self._rucio_client.list_rses())
        except:
            print("No RSE received from Rucio.")

        return result

    def ListContent(self, scope, name):
        """Function: ListContent()

        :param scope: A string which refers to a Rucio scope
        :param name: A string which refers to a Rucio name
        :return result: A list of dictionaries with the attached files to the DID
        """
        result = []
        try:
            return list(self._rucio_client.list_content(scope, name))
        except TypeError as e:
            print(e)

        return result

    def ListScopes(self):
        """Function: ListScopes()

        List all created scopes in the Rucio catalogue

        :return result: A list of scopes, otherwise []
        """
        result = []
        try:
            result = self._rucio_client.list_scopes()
        except:
            print("No scopes? - Check that!")
        return result

    def ListFileReplicas(self, scope, lfn):
        """Function: ListFileReplicas(...)

        List all your files which are attached to a dataset or container

        :param scope: A string which follows the rules of a Rucio scope
        :param lfn: the lfn.
        :return result: A list of file replicas, otherwise []
        """

        #todo FIX ME
        result = []
        result = self._rucio_client.list_file_replicas(scope, lfn)
        return result

        #try:
        #    result = self._rucio_client.list_file_replicas(scope, lfn)
        #except AttributeError as e:
        #    print(e)
        return result

    def ListFiles(self, scope, name, long=True):
        """Function: ListFiles(...)

        List all your files which are attached to a dataset or container

        :param scope: A string which follows the rules of a Rucio scope
        :param name: A string which follows the rules of a Rucio name
        :param long: Define another output (Check the Rucio tutorials for it)
        :return result: A list of files, otherwise []
        """
        result = []
        try:
            result = self._rucio_client.list_files(scope, name, long=None)
        except:
            print("No files are listed for {0}:{1}".format(scope, name))
        return result

    def ListDids(self,
                 scope,
                 filters,
                 type='collection',
                 long=False,
                 recursive=False):
        """
        List all data identifiers in a scope which match a given pattern. Check Rucio github page for details

        :param scope: The valid string which follows the Rucio scope name.
        :param filters: A dictionary of key/value pairs like {'name': 'file_name','rse-expression': 'tier0'}.
        :param type: The type of the did: 'all'(container, dataset or file)|'collection'(dataset or container)|'dataset'|'container'|'file'
        :param long: Long format option to display more information for each DID.
        :param result: Recursively list DIDs content.
        """

        result = []
        try:
            return list(
                self._rucio_client.list_dids(scope, filters, type, long,
                                             recursive))
        except TypeError as e:
            print(e)
        return result

    def ListDidRules(self, scope, name):
        """Return a class generator from Rucio which contains the
        individual rules to iterate over (or to create a list from)

        :param scope: A string which refers to the Rucio scope
        :param name: A string which refers to the Rucio name (a container, dataset or file name)

        :return: A list of Rucio transfer rules with additional rule information. Each list element stands for a
                 Rucio Storage Element (RSE). List is empty if not successful or nor rules.
        """

        result = []
        try:
            return list(self._rucio_client.list_did_rules(scope, name))
        except TypeError as e:
            print(e)

        return result

    #Attach and detach:
    def AttachDids(self, scope, name, attachment, rse=None):
        """Function: AttachDids(...)

        This function allows to attach datasets or containers to a top-level dataset or container.
        The parameters scope and name define the top-level structure (container or dataset) and the dictionary or
        the list of dictionaries contains the information about what is attached to the top-level structure.

        More information under https://github.com/rucio/rucio


        :param scope: A string which follows the rules of a Rucio scope
        :param name: A string which follows the rules of a Rucio name
        :param attachment: A dictionary or a list of dictionaries which consist of two keys: scope and name
                           example{'scope': 'example_scope1', 'name':'example_name1'}
        :param rse: The RSE name when registering replicas. (optional)
        :return result: 0 if successful, 1 for failure
        """
        result = 1

        #In case there is only an individual dictionary provided, the dictionary is transformed into a list of
        #dictionaries.
        if isinstance(attachment, dict) == True:
            attachment = [attachment]

        #self._rucio_client.attach_dids(scope, name, attachment, rse=rse)

        try:
            self._rucio_client.attach_dids(scope, name, attachment, rse=rse)
            result = 0
        except DuplicateContent as e:
            print(e)

        return result

    def DetachDids(self, scope, name, dids):
        try:
            self._rucio_client.detach_dids(scope, name, dids)
        except:
            return None

    #Container and Dataset managment:
    def CreateContainer(self,
                        scope,
                        name,
                        statuses=None,
                        meta=None,
                        rules=None,
                        lifetime=None):
        """Function CreateContainer(...)

        Follows the Rucio API to create a Rucio container based on scope and container name. It accept also further
        Rucio features.
        More information under https://github.com/rucio/rucio

        :param scope: A string which follows the rules of a Rucio scope
        :param name: A string which follows the rules of a Rucio container name
        :param statuses: Status (optional)
        :param meta: Put in further meta data which are going to be connected to the container. (optional)
        :param rules: Define transfer rules which apply to the container immediately. (optional)
        :param lifetime: Set a Rucio lifetime to the container if you with (optional)
        :return result: 0 if successful, 1 for failure
        """
        result = 1
        try:
            self._rucio_client.add_container(scope,
                                             name,
                                             statuses=None,
                                             meta=None,
                                             rules=None,
                                             lifetime=None)
            result = 0
        except DataIdentifierAlreadyExists as e:
            print(e)
        return result

    def CreateDataset(self,
                      scope,
                      name,
                      statuses=None,
                      meta=None,
                      rules=None,
                      lifetime=None,
                      files=None,
                      rse=None,
                      verbose=False):
        """Function CreateDataset(...)

        Follows the Rucio API to create a Rucio dataset based on scope and dataset name. It accept also further
        Rucio features.
        More information under https://github.com/rucio/rucio

        :param scope:    A string which follows the rules of a Rucio scope
        :param name:     A string which follows the rules of a Rucio dataset name
        :param statuses: Status (optional)
        :param meta:     Put in further meta data which are going to be connected to the container. (optional)
        :param rules:    Define transfer rules which apply to the container immediately. (optional)
        :param lifetime: Set a Rucio lifetime to the container if you with (optional)
        :param verbose:  Flag to print DataIdentifierAlreadyExists exceptions
        :return result:  0 if successful, 1 for failure
        """
        result = 1
        try:
            self._rucio_client.add_dataset(scope, name, statuses=None, meta=None, rules=None, lifetime=None,\
                                           files=None, rse=None)
            result = 0
        except DataIdentifierAlreadyExists as e:
            if verbose:
                print(e)
        return result

    #Rules:
    def AddRule(self,
                dids,
                copies,
                rse_expression,
                weight=None,
                lifetime=None,
                grouping='DATASET',
                account=None,
                locked=False,
                source_replica_expression=None,
                activity=None,
                notify='N',
                purge_replicas=False,
                ignore_availability=False,
                comment=None,
                ask_approval=False,
                asynchronous=False,
                priority=3,
                meta=None):
        """Function: AddRule(...)

        A function to add a Rucio transfer rule to the given Rucio data identifiers (DIDs)
        More information under https://github.com/rucio/rucio

        :param dids:                       The data identifier set.
        :param copies:                     The number of replicas.
        :param rse_expression:             Boolean string expression to give the list of RSEs.
        :param weight:                     If the weighting option of the replication rule is used, the choice of RSEs takes their weight into account.
        :param lifetime:                   The lifetime of the replication rules (in seconds).
        :param grouping:                   ALL -  All files will be replicated to the same RSE.
                                           DATASET - All files in the same dataset will be replicated to the same RSE.
                                           NONE - Files will be completely spread over all allowed RSEs without any grouping considerations at all.
        :param account:                    The account owning the rule.
        :param locked:                     If the rule is locked, it cannot be deleted.
        :param source_replica_expression:  RSE Expression for RSEs to be considered for source replicas.
        :param activity:                   Transfer Activity to be passed to FTS.
        :param notify:                     Notification setting for the rule (Y, N, C).
        :param purge_replicas:             When the rule gets deleted purge the associated replicas immediately.
        :param ignore_availability:        Option to ignore the availability of RSEs.
        :param ask_approval:               Ask for approval of this replication rule.
        :param asynchronous:               Create rule asynchronously by judge-injector.
        :param priority:                   Priority of the transfers.
        :param comment:                    Comment about the rule.
        :param meta:                       Metadata, as dictionary.

        :return result:  0 if successful, 1 for failure
        """
        result = 1

        try:
            #            self._rucio_client.add_replication_rule(dids, copies, rse_expression, weight=None, lifetime=lifetime,
            #                                                    grouping='DATASET', account=None, locked=False,
            #                                                    source_replica_expression=None, activity=None, notify='N',
            #                                                    purge_replicas=False, ignore_availability=False, comment=None,
            #                                                    ask_approval=False, asynchronous=False, priority=3)
            self._rucio_client.add_replication_rule(
                dids,
                copies,
                rse_expression,
                weight=None,
                lifetime=lifetime,
                grouping='DATASET',
                account=None,
                locked=False,
                source_replica_expression=source_replica_expression,
                activity=None,
                notify='N',
                purge_replicas=False,
                ignore_availability=False,
                comment=None,
                ask_approval=False,
                asynchronous=False,
                priority=priority)
            result = 0
        except DuplicateRule as e:
            print(e)

        return result

    def UpdateRule(self, rule_id, options=None):
        """Function UpdateRule()

        Aims to update a particular rule according to its rule_id and further option such as lifetime
        :param rule_id: A Rucio rule id string
        :param options: A dictionary with certain options (e.g. lifetime, weight, ,...)
        :return result: 0 on success, 1 at failure
        """
        result = 1
        try:
            self._rucio_client.update_replication_rule(rule_id, options)
            result = 0
        except:
            print("Raised exception in UpdateRule")

        return result

    def GetReplicationRule(self, rule_id, estimate_ttc=False):
        """Function: GetReplicationRule(...)

        Get information on the replication rule based on the rule ID

        :param rule_id: A valid Rucio rule ID
        :return result: Information on the replication rule, otherwise 1
        """
        result = 1
        try:
            result = self._rucio_client.get_replication_rule(
                self, rule_id, estimate_ttc=False)
        except:
            print("No replication rule to get")
        return result

    def DeleteRule(self, rule_id):
        """Function: DeleteRule(...)

        Deletes a replication rule.
        :param rule_id: A rucio rule id string
        """
        self._rucio_client.delete_replication_rule(rule_id,
                                                   purge_replicas=True)

    #Metadata:
    def GetMetadata(self, scope, name):
        try:
            return self._rucio_client.get_metadata(scope, name)
        except:
            return None

    def SetMetadata(self, scope, name, key, value, recursive=False):
        try:
            return self._rucio_client.set_metadata(scope,
                                                   name,
                                                   key,
                                                   value,
                                                   recursive=False)
        except:
            return None

    #Data upload / download / register
    def Upload(self, upload_dict=None):
        """Function: Upload()

        The list of dictionaries need to follow this convention:
        Rucio/Github: https://github.com/rucio/rucio/blob/master/lib/rucio/client/uploadclient.py#L71

        :param upload_dict: A list object with dictionaries
        :return result: 0 on success, 1 on failure

        """
        result = self._rucio_client_upload.upload(upload_dict)
        return result

    def DownloadDids(self, items, num_threads=2, trace_custom_fields={}):
        """Function: DownloadDids(...)

        Download from the Rucio catalogue by Rucio DIDs (or a list of them)

        :param items: A list or a dictionary of information what to download
        :param num_threads: Specify the number threads on the CPU, standard 2 (optional)
        :param trace_custom_fields: Customize downloads (Look at Rucio tutorials) (optional)
        :return result: A list of dictionaries of Rucio download result messages. If it fails: 1
        """
        result = 1
        #if a dictionary is handed over, we create a list of it.
        if isinstance(items, dict):
            items = [items]

        try:
            result = self._rucio_client_download.download_dids(
                items=items,
                num_threads=num_threads,
                trace_custom_fields=trace_custom_fields)
        except:
            result = 1

        return result

    def Register(self, rse, files, ignore_availability=True):
        #See email "IceCube Script to register data"
        #from Benedikt.
        #files = {
        #'scope': self.scope,
        #'name': replicas[filemd]['name'],
        #'adler32': replicas[filemd]['adler32'],
        #'bytes': replicas[filemd]['size'],
        #} for filemd in replicas]
        #--> Think about metadata
        try:
            self._rucio_client.add_replicas(rse, files, ignore_availability)
        except:
            print("Problem with file name does not match pattern")

        for filemd in replicas:
            try:
                self.didc.attach_dids(scope=self.scope,
                                      name=self.run_Number,
                                      dids=[{
                                          'scope': self.scope,
                                          'name': replicas[filemd]['name']
                                      }])
            except FileAlreadyExists:
                print("File already attached")
Exemple #2
0
class CMSRucioDatasetReplica(object):
    """
    Class repeesenting the replica at a site af a CMS Dataset (PhEDEx FileBlock)
    """

    #pylint: disable=too-many-arguments
    def __init__(self,
                 rds,
                 pnn,
                 rse=None,
                 scope=DEFAULT_SCOPE,
                 lifetime=None,
                 pcli=None,
                 rcli=None):
        """
        Get the status of replica of pditem at pnn
        considering only closed blocks completely replicated at site.

        :pnn:    PhEDEx node name.
        :rds:    Rucio Dataset (PhEDEx FileBlock) name.
        :rse:    Rucio RSE. If None (default) inferred by the pnn using DEFAULT_RSE_FMT.
        :scope:  Scope. Default: DEFAULT_SCOPE.
        :pcli:   Reference to a phedex.PhEDEx object or a dict
                 {'instance': <instance>, 'dasgoclient': <path>, 'datasvc': <url>}
                 none of the keys is mandatory. Default is {}.
        :rcli:   Reference to a rucio Client() instance or a dict
                 {'accont': ..., ... } none of the keys is mandatory.
                 Default is {'account': <sync account>}
        """

        self.pnn = pnn

        self._get_pcli(pcli)

        self._get_rcli(rcli)

        if rse is None:
            self.rse = self.rcli.list_rses('cms_type=real&pnn=%s' %
                                           self.pnn)[0]['rse']
        else:
            self.rse = rse

        self.container = self.pcli.check_data_item(pditem=rds)['pds']

        self.dataset = rds

        self.scope = scope

        self.lifetime = lifetime

        self.block_at_pnn()

        if self.is_at_pnn:
            self.replicas = self.pcli.fileblock_files(pnn=pnn, pfb=rds)
        else:
            self.replicas = {}

    def _get_pcli(self, pcli):
        if pcli is None:
            pcli = {}

        if isinstance(pcli, dict):
            self.pcli = PhEDEx(**pcli)
        elif isinstance(pcli, PhEDEx):
            #pylint: disable=redefined-variable-type
            self.pcli = pcli
        else:
            raise Exception("wrong type for pcli parameter %s" %\
                            type(pcli))

    def _get_rcli(self, rcli):
        if rcli is None:
            rcli = {}

        if isinstance(rcli, dict):
            if 'account' not in rcli:
                rcli['account'] = SYNC_ACCOUNT_FMT % self.pnn.lower()
            self.rcli = Client(**rcli)
        elif isinstance(rcli, Client):
            #pylint: disable=redefined-variable-type
            self.rcli = rcli
        else:
            raise Exception("wrong type for rcli parameter %s" %\
                            type(rcli))

    def block_at_pnn(self):
        """
        Verify if the block is at pnn (using phedex datasvn)
        """
        metadata = self.pcli.list_data_items(pditem=self.dataset,
                                             pnn=self.pnn,
                                             locality=True,
                                             metadata=True)
        self.is_at_pnn = bool(len(metadata) == 1 and\
            'block' in metadata[0] and\
            'replica' in metadata[0]['block'][0] and\
            metadata[0]['block'][0]['replica'][0]['complete'] == 'y')

    def register_container(self, dry=False):
        """
        Register container of the dataset
        (only if there is a dataset replica on the pnn)
        :dry: Dry run. Default false.
        """

        try:
            self.rcli.get_did(scope=self.scope, name=self.container)
            return 'exists'
        except DataIdentifierNotFound:
            pass

        if self.is_at_pnn and dry:
            logging.dry('Create container %s in scope %s.', self.container,
                        self.scope)
            return 'created'
        elif self.is_at_pnn:
            logging.verbose('Create container %s in scope %s.', self.container,
                            self.scope)
            try:
                self.rcli.add_container(scope=self.scope,
                                        name=self.container,
                                        lifetime=self.lifetime)

            except DataIdentifierAlreadyExists:
                logging.warning('Container was created in the meanwhile')
                return 'exists'

            return 'created'

        return 'skipped'

    def register_dataset(self, dry=False):
        """
        Register the dataset (if there is a replica at the pnn)
        :dry: Dry run. Default false.
        """

        try:
            self.rcli.get_did(scope=self.scope, name=self.dataset)
            return 'exists'
        except DataIdentifierNotFound:
            pass

        if self.is_at_pnn and dry:
            logging.dry('Create dataset %s in scope %s.', self.dataset,
                        self.scope)
            return 'created'

        elif self.is_at_pnn:
            logging.verbose('Create dataset %s in scope %s.', self.dataset,
                            self.scope)
            self.rcli.add_dataset(scope=self.scope,
                                  name=self.dataset,
                                  lifetime=self.lifetime)
            self.rcli.attach_dids(scope=self.scope,
                                  name=self.container,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': self.dataset
                                  }])
            return 'created'

        return 'skipped'

    def update_replicas(self, dry=False):
        """
        Add or removes replicas for the dataset at rse.
        :dry:  Drydrun. default false
        """

        logging.notice('Updating replicas for %s:%s at %s' %
                       (self.scope, self.dataset, self.rse))

        replicas = self.rcli.list_replicas([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                           rse_expression='rse=%s' % self.rse)

        rrepl = [repl['name'] for repl in replicas]

        prepl = [repl for repl in self.replicas.keys()]

        missing = list(set(prepl) - set(rrepl))

        to_remove = list(set(rrepl) - set(prepl))

        if missing and dry:
            logging.dry('Adding replicas %s to rse %s.', str(missing),
                        self.rse)

        elif missing:
            logging.verbose('Adding replicas %s to rse %s.', str(missing),
                            self.rse)

            self.rcli.add_replicas(rse=self.rse,
                                   files=[{
                                       'scope':
                                       self.scope,
                                       'name':
                                       self.replicas[lfn]['name'],
                                       'adler32':
                                       self.replicas[lfn]['checksum'],
                                       'bytes':
                                       self.replicas[lfn]['size'],
                                   } for lfn in missing])

            # missing files that are not in the list of dataset files
            # are to be attached.
            lfns = [
                item['name']
                for item in self.rcli.list_files(scope=self.scope,
                                                 name=self.dataset)
            ]

            missing_lfns = list(set(missing) - set(lfns))
            if missing_lfns:
                logging.verbose('Attaching lfns %s to dataset %s.',
                                str(missing_lfns), self.dataset)

                try:
                    self.rcli.attach_dids(
                        scope=self.scope,
                        name=self.dataset,
                        dids=[{
                            'scope': self.scope,
                            'name': lfn
                        } for lfn in list(set(missing) - set(lfns))])

                except FileAlreadyExists:
                    logging.warning('Trying to attach already existing files.')

        if to_remove and dry:
            logging.dry('Removing replicas %s from rse %s.', str(to_remove),
                        self.rse)

        elif to_remove:
            logging.verbose('Removing replicas %s from rse %s.',
                            str(to_remove), self.rse)
            for to_remove_chunk in chunks(to_remove, REMOVE_CHUNK_SIZE):
                attempt = 0
                while True:
                    attempt += 1
                    try:
                        self.rcli.delete_replicas(rse=self.rse,
                                                  files=[{
                                                      'scope': self.scope,
                                                      'name': lfn,
                                                  } for lfn in to_remove_chunk
                                                         ])
                        break
                    except DatabaseException:
                        logging.warning(
                            'DatabaseException raised, retrying...')
                        if attempt > 3:
                            raise
                        time.sleep(randint(1, 5))

        return {'added': missing, 'removed': to_remove}

    def update_rule(self, dry=False):
        """
        Adds or removes the rule for the dataset.
        :dry:  Drydrun. default false

        returns the action performed: None, added, removed
        """
        rules = self.rcli.list_did_rules(scope=self.scope, name=self.dataset)
        rrule = None
        account = self.rcli.__dict__['account']
        action = None
        rse_exp = 'rse=' + self.rse

        rrule = next((
            rule for rule in rules
            if rule['account'] == account and\
                rule['rse_expression'] == rse_exp
        ), None)

        if rrule is None and self.is_at_pnn:

            if dry:
                logging.dry("Adding rule for dataset %s at rse %s.",
                            self.dataset, self.rse)
            else:
                self.rcli.add_replication_rule(
                    dids=[{
                        'scope': self.scope,
                        'name': self.dataset
                    }],
                    copies=1,
                    rse_expression=rse_exp,
                )
            action = 'added'

        elif rrule is not None and not self.is_at_pnn:
            # removing rule
            if dry:
                logging.dry("Removing rule for dataset %s at rse %s.",
                            self.dataset, self.rse)
            else:
                self.rcli.delete_replication_rule(rrule['id'],
                                                  purge_replicas=False)
            action = 'removed'

        return action

    def update(self, dry=False):
        """
        syncronize the dataset replica info.
        :dry:  Drydrun. default false
        """
        ret = {'at_node': self.is_at_pnn}

        #datasets and containers are only added
        ret['container'] = self.register_container(dry)
        ret['dataset'] = self.register_dataset(dry)

        ret['replicas'] = self.update_replicas(dry)
        ret['rule'] = self.update_rule(dry)

        return ret
class Rucio:
    def __init__(self,
                 myscope,
                 orgRse,
                 destRse,
                 account='bruzzese',
                 working_folder=None):

        self.myscope = myscope
        self.orgRse = orgRse
        self.destRse = destRse
        self.working_folder = working_folder

        self.gfal = Gfal2Context()

        self.didc = DIDClient()
        self.repc = ReplicaClient()
        self.rulesClient = RuleClient()

        # Configuration
        self.account = account

        # account=account
        self.client = Client(account=self.account)

        # Get list of all RSEs
    def rses(self):
        rses_lists = list()
        for single_rse in list(self.client.list_rses()):
            rses_lists.append(single_rse['rse'])
        return (rses_lists)

    def usage(self, s_rse):
        return (list(
            self.client.get_local_account_usage(account=self.account,
                                                rse=s_rse))[0])

    def rules(self):
        return (list(self.client.list_account_rules(account=self.account)))

    def myfunc(self):
        print(
            "Hello your setting are account=%s, scope=%s, origin RSE =%s and destination RSE =%s"
            % (self.account, self.myscope, self.orgRse, self.destRse))

    def file_exists(self, pfn):
        try:
            self.gfal.stat(pfn).st_size
            return (True)
        except:
            return (False)

    def get_rse_url(self):
        """
        Return the base path of the rucio url
        """
        rse_settings = rsemgr.get_rse_info(self.orgRse)
        protocol = rse_settings['protocols'][0]

        schema = protocol['scheme']
        prefix = protocol['prefix']
        port = protocol['port']
        rucioserver = protocol['hostname']

        rse_url = list()
        if None not in (schema, str(rucioserver + ':' + str(port)), prefix):
            rse_url.extend(
                [schema, rucioserver + ':' + str(port), prefix, '', ''])
            if self.working_folder != None:
                # Check if our test folder exists
                path = os.path.join(urlunsplit(rse_url), self.working_folder)
                self.gfal.mkdir_rec(path, 775)
                return (path)
            else:
                return (urlunsplit(rse_url))
        else:
            return ('Wrong url parameters')

    def check_replica(self, lfn, dest_rse=None):
        """
        Check if a replica of the given file at the site already exists.
        """
        print('here', self.myscope, lfn, dest_rse)
        if lfn:
            try:
                replicas = list(
                    self.client.list_replicas([{
                        'scope': self.myscope,
                        'name': lfn
                    }],
                                              rse_expression=dest_rse))

                if replicas:
                    for replica in replicas:
                        if isinstance(replica, dict):
                            if dest_rse in replica['rses']:
                                path = replica['rses'][dest_rse][0]
                                return (path)
                return (False)
            except:
                pass

    ############################

    ## Create Metadata for DIDs

    ############################
    def getFileMetaData(self, p_file, origenrse=None):
        """
        Get the size and checksum for every file in the run from defined path
        """
        '''
        generate the registration of the file in a RSE :
        :param rse: the RSE name.
        :param scope: The scope of the file.
        :param name: The name of the file.
        :param bytes: The size in bytes.
        :param adler32: adler32 checksum.
        :param pfn: PFN of the file for non deterministic RSE  
        :param dsn: is the dataset name.
        '''
        name = os.path.basename(p_file)
        name = name.replace('/', '')
        name = name.replace('%', '_')

        replica = {
            'scope': self.myscope,
            'name': name.replace('+', '_'),
            'adler32': self.gfal.checksum(p_file, 'adler32'),
            'bytes': self.gfal.stat(p_file).st_size,
            'pfn': p_file,
            "meta": {
                "guid": str(generate_uuid())
            }
        }

        Data = dict()
        Data['replica'] = replica
        Data['scope'] = self.myscope

        return (Data)

    ############################

    ## Create Groups of DIDs

    ############################
    def createDataset(self, new_dataset):
        logger.debug(
            "|  -  - Checking if a provided dataset exists: %s for a scope %s"
            % (new_dataset, self.myscope))
        try:
            self.client.add_dataset(scope=self.myscope, name=new_dataset)
            return (True)
        except DataIdentifierAlreadyExists:
            return (False)
        except Duplicate as error:
            return generate_http_error_flask(409, 'Duplicate', error.args[0])
        except AccountNotFound as error:
            return generate_http_error_flask(404, 'AccountNotFound',
                                             error.args[0])
        except RucioException as error:
            exc_type, exc_obj, tb = sys.exc_info()
            logger.debug(exc_obj)

    def createcontainer(self, name_container):
        '''
        registration of the dataset into a container :
        :param name_container: the container's name
        :param info_dataset : contains, 
            the scope: The scope of the file.
            the name: The dataset name.
        '''
        logger.debug("|  -  -  - registering container %s" % name_container)

        try:
            self.client.add_container(scope=self.myscope, name=name_container)
        except DataIdentifierAlreadyExists:
            logger.debug("|  -  -  - Container %s already exists" %
                         name_container)
        except Duplicate as error:
            return generate_http_error_flask(409, 'Duplicate', error.args[0])
        except AccountNotFound as error:
            return generate_http_error_flask(404, 'AccountNotFound',
                                             error.args[0])
        except RucioException as error:
            exc_type, exc_obj, tb = sys.exc_info()
            logger.debug(exc_obj)

    ############################

    ## General funciotn for registering a did into a GROUP of DID (CONTAINER/DATASET)

    ############################
    def registerIntoGroup(self, n_file, new_dataset):
        """
        Attaching a DID to a GROUP
        """
        type_1 = self.client.get_did(scope=self.myscope, name=new_dataset)
        type_2 = self.client.get_did(scope=self.myscope, name=n_file)

        print('attaching ', n_file, new_dataset)
        try:
            self.client.attach_dids(scope=self.myscope,
                                    name=new_dataset,
                                    dids=[{
                                        'scope': self.myscope,
                                        'name': n_file
                                    }])
        except RucioException:
            logger.debug("| - - - %s already attached to %s" %
                         (type_2['type'], type_1['type']))

    ############################

    ## MAGIC functions

    ############################
    def create_groups(self, organization):

        #print(organization)
        # 2.1) Create the dataset and containers for the file
        self.createDataset(organization['dataset_1'].replace('%', '_'))
        # 2.1.1) Attach the dataset and containers for the file
        self.registerIntoGroup(organization['replica'].replace('%', '_'),
                               organization['dataset_1'].replace('%', '_'))

        # 2.2) Create the dataset and containers for the file
        self.createcontainer(organization['container_1'].replace('%', '_'))
        # 2.2.1) Attach the dataset and containers for the file
        self.registerIntoGroup(organization['dataset_1'].replace('%', '_'),
                               organization['container_1'].replace('%', '_'))

        # 2.3) Create the dataset and containers for the file
        self.createcontainer(organization['container_2'].replace('%', '_'))
        # 2.3.1) Attach the dataset and containers for the file
        self.registerIntoGroup(organization['container_1'].replace('%', '_'),
                               organization['container_2'].replace('%', '_'))

        # 2.4) Create the dataset and containers for the file
        self.createcontainer(organization['container_3'].replace('%', '_'))
        # 2.4.1) Attach the dataset and containers for the file
        self.registerIntoGroup(organization['container_2'].replace('%', '_'),
                               organization['container_3'].replace('%', '_'))

    ############################

    ## Create Rule for DIDs

    ############################
    def addReplicaRule(self, destRSE, group):
        """
        Create a replication rule for one dataset at a destination RSE
        """

        type_1 = self.client.get_did(scope=self.myscope, name=group)
        logger.debug("| - - - Creating replica rule for %s %s at rse: %s" %
                     (type_1['type'], group, destRSE))
        if destRSE:
            try:
                rule = self.rulesClient.add_replication_rule(
                    [{
                        "scope": self.myscope,
                        "name": group
                    }],
                    copies=1,
                    rse_expression=destRSE,
                    grouping='ALL',
                    account=self.account,
                    purge_replicas=True,
                    asynchronous=True)
                logger.debug("| - - - - Rule succesfully replicated at %s" %
                             destRSE)
                logger.debug("| - - - - - The %s has the following id %s" %
                             (rule, destRSE))
                return (rule[0])
            except DuplicateRule:
                exc_type, exc_obj, tb = sys.exc_info()
                rules = list(
                    self.client.list_account_rules(account=self.account))
                if rules:
                    for rule in rules:
                        if rule['rse_expression'] == destRSE and rule[
                                'scope'] == self.myscope and rule[
                                    'name'] == group:
                            logger.debug(
                                '| - - - - Rule already exists %s which contains the following DID %s:%s %s'
                                % (rule['id'], self.myscope, group,
                                   str(exc_obj)))
            except ReplicationRuleCreationTemporaryFailed:
                exc_type, exc_obj, tb = sys.exc_info()
                rules = list(
                    self.client.list_account_rules(account=self.account))
                if rules:
                    for rule in rules:
                        if rule['rse_expression'] == destRSE and rule[
                                'scope'] == self.myscope and rule[
                                    'name'] == group:
                            print(
                                '| - - - - Rule already exists %s which contains the following DID %s:%s %s'
                                % (rule['id'], self.myscope, group,
                                   str(exc_obj)))

    def addReplicaRule_noasync(self, destRSE, group):
        """
        Create a replication rule for one dataset at a destination RSE
        """

        type_1 = self.client.get_did(scope=self.myscope, name=group)
        logger.debug("| - - - Creating replica rule for %s %s at rse: %s" %
                     (type_1['type'], group, destRSE))
        if destRSE:
            try:
                rule = self.rulesClient.add_replication_rule(
                    [{
                        "scope": self.myscope,
                        "name": group
                    }],
                    copies=1,
                    rse_expression=destRSE,
                    grouping='ALL',
                    account=self.account,
                    purge_replicas=True)
                logger.debug("| - - - - Rule succesfully replicated at %s" %
                             destRSE)
                logger.debug("| - - - - - The %s has the following id %s" %
                             (rule, destRSE))
                return (rule[0])
            except DuplicateRule:
                exc_type, exc_obj, tb = sys.exc_info()
                rules = list(
                    self.client.list_account_rules(account=self.account))
                if rules:
                    for rule in rules:
                        if rule['rse_expression'] == destRSE and rule[
                                'scope'] == self.myscope and rule[
                                    'name'] == group:
                            logger.debug(
                                '| - - - - Rule already exists %s which contains the following DID %s:%s %s'
                                % (rule['id'], self.myscope, group,
                                   str(exc_obj)))
            except ReplicationRuleCreationTemporaryFailed:
                exc_type, exc_obj, tb = sys.exc_info()
                rules = list(
                    self.client.list_account_rules(account=self.account))
                if rules:
                    for rule in rules:
                        if rule['rse_expression'] == destRSE and rule[
                                'scope'] == self.myscope and rule[
                                    'name'] == group:
                            print(
                                '| - - - - Rule already exists %s which contains the following DID %s:%s %s'
                                % (rule['id'], self.myscope, group,
                                   str(exc_obj)))

    ############################

    ## Create Rules for not registered DIDs

    ############################
    def outdated_register_replica(self, filemds, dest_RSE, org_RSE):
        """
        Register file replica.
        """
        carrier_dataset = 'outdated_replication_dataset' + '-' + str(
            uuid.uuid4())

        creation = self.createDataset(carrier_dataset)

        # Make sure your dataset is ephemeral

        self.client.set_metadata(scope=self.myscope,
                                 name=carrier_dataset,
                                 key='lifetime',
                                 value=86400)  # 86400 in seconds = 1 day

        # Create a completly new create the RULE:
        for filemd in filemds:
            outdated = filemd['replica']['name']
            self.registerIntoGroup(outdated, carrier_dataset)

        # Add dummy dataset for replicating at Destination RSE
        # Sometimes Rucio ends up with an error message like this : rucio.common.exception.RuleNotFound: No replication rule found.
        # In order to avoid that nonsense error we do the following loop :
        '''for i in range(0,100):
            while True:
                try:
                    # do stuff
                    rule = self.addReplicaRule(dest_RSE, group=carrier_dataset)
                    if rule != None :
                        rule_child = rule 
                except :
                    continue
                break'''
        for i in range(0, 10):
            print(i)
            try:
                # do stuff
                rule = self.addReplicaRule(dest_RSE, group=carrier_dataset)
                if rule != None:
                    rule_child = rule
                print(rule_child)
                break
            except:
                print('fail')
                continue

        for i in range(0, 10):
            print(i)
            try:
                # do stuff
                rule = self.addReplicaRule_noasync(org_RSE,
                                                   group=carrier_dataset)
                if rule != None:
                    rule_parent = rule
                print(rule_parent)
                break
            except:
                print('fail')
                continue

# rule_child = self.addReplicaRule(dest_RSE, group=carrier_dataset)

# Add dummy dataset for replicating Origin RSE
# rule_parent = self.addReplicaRule(org_RSE, group=carrier_dataset)

        print(rule_child, rule_parent)
        # Create a relation rule between origin and destiny RSE, so that the source data can be deleted
        rule = self.client.update_replication_rule(rule_id=rule_parent,
                                                   options={
                                                       'lifetime': 10,
                                                       'child_rule_id':
                                                       rule_child,
                                                       'purge_replicas': True
                                                   })
        logger.debug(
            '| - - - - Creating relationship between parent %s and child %s : %s'
            % (rule_parent, rule_child, rule))

        # Create a relation rule between the destinity rule RSE with itself, to delete the dummy rule, whiles keeping the destiny files
        rule = self.client.update_replication_rule(rule_id=rule_child,
                                                   options={
                                                       'lifetime': 10,
                                                       'child_rule_id':
                                                       rule_child
                                                   })
        logger.debug(
            '| - - - - Creating relationship between parent %s and child %s : %s'
            % (rule_parent, rule_child, rule))

    ############################

    ## Create Dictionary for Grafana

    ############################
    def stats_rules(self, rules):
        '''
        Gather general information about 
        total number of rules, and stats.
        '''
        RUCIO = dict()
        if rules:
            for rule in rules:
                if 'outdated_replication_dataset' not in rule['name']:
                    if 'Rules' not in RUCIO:
                        RUCIO['Rules'] = {
                            'total_stuck': 0,
                            'total_replicating': 0,
                            'total_ok': 0,
                            'total_rules': 0
                        }

                        RUCIO['Rules']['total_rules'] += 1
                        if rule['state'] == 'REPLICATING':
                            RUCIO['Rules']['total_replicating'] += 1
                        elif rule['state'] == 'STUCK':
                            RUCIO['Rules']['total_stuck'] += 1
                        elif rule['state'] == 'OK':
                            RUCIO['Rules']['total_ok'] += 1

                    else:
                        RUCIO['Rules']['total_rules'] += 1
                        if rule['state'] == 'REPLICATING':
                            RUCIO['Rules']['total_replicating'] += 1
                        elif rule['state'] == 'STUCK':
                            RUCIO['Rules']['total_stuck'] += 1
                        elif rule['state'] == 'OK':
                            RUCIO['Rules']['total_ok'] += 1

                if 'AllRules' not in RUCIO:
                    RUCIO['AllRules'] = {
                        'total_stuck': 0,
                        'total_replicating': 0,
                        'total_ok': 0,
                        'total_rules': 0
                    }

                    RUCIO['AllRules']['total_rules'] += 1
                    if rule['state'] == 'REPLICATING':
                        RUCIO['AllRules']['total_replicating'] += 1
                    elif rule['state'] == 'STUCK':
                        RUCIO['AllRules']['total_stuck'] += 1
                    elif rule['state'] == 'OK':
                        RUCIO['AllRules']['total_ok'] += 1

                else:
                    RUCIO['AllRules']['total_rules'] += 1
                    if rule['state'] == 'REPLICATING':
                        RUCIO['AllRules']['total_replicating'] += 1
                    elif rule['state'] == 'STUCK':
                        RUCIO['AllRules']['total_stuck'] += 1
                    elif rule['state'] == 'OK':
                        RUCIO['AllRules']['total_ok'] += 1

                ##################
                if 'Grouping' not in RUCIO:
                    RUCIO['Grouping'] = {
                        'file': 0,
                        'dataset': 0,
                        'container': 0
                    }

                    if rule['did_type'] == 'CONTAINER':
                        RUCIO['Grouping']['container'] += 1
                    elif rule['did_type'] == 'DATASET':
                        RUCIO['Grouping']['dataset'] += 1
                    elif rule['did_type'] == 'FILE':
                        RUCIO['Grouping']['file'] += 1

                else:
                    if rule['did_type'] == 'CONTAINER':
                        RUCIO['Grouping']['container'] += 1
                    elif rule['did_type'] == 'DATASET':
                        RUCIO['Grouping']['dataset'] += 1
                    elif rule['did_type'] == 'FILE':
                        RUCIO['Grouping']['file'] += 1
            return (RUCIO)

    def stats_replica_rules(self, rules):
        '''
        Gather specific information about 
        state and number of replicas.
        '''
        REPLICAS = dict()
        REPLICAS['RSE'] = {}
        if rules:
            # Creates a key for all the RSEs that we have replicas
            for rule in rules:
                # if the RSE is not in the dictionary
                #print(rule['rse_expression'], REPLICAS['RSE'])
                if rule['rse_expression'] not in REPLICAS['RSE']:
                    #print(REPLICAS)
                    REPLICAS['RSE'][rule['rse_expression']] = {
                        'total_replica_stuck': rule['locks_stuck_cnt'],
                        'total_replica_replicating':
                        rule['locks_replicating_cnt'],
                        'total_replica_ok': rule['locks_ok_cnt']
                    }
                # else if it  is, update replica numbers
                else:
                    REPLICAS['RSE'][rule['rse_expression']][
                        'total_replica_stuck'] += rule['locks_stuck_cnt']
                    REPLICAS['RSE'][rule['rse_expression']][
                        'total_replica_replicating'] += rule[
                            'locks_replicating_cnt']
                    REPLICAS['RSE'][rule['rse_expression']][
                        'total_replica_ok'] += rule['locks_ok_cnt']
            return (REPLICAS)

    def stats_usage_rules(self, all_rses):
        STORAGE = dict()
        STORAGE['USAGE'] = {}
        for x_rse in all_rses:
            rses = self.usage(x_rse)
            if rses['bytes'] != 0:
                if rses['rse'] not in STORAGE['USAGE']:
                    STORAGE['USAGE'][rses['rse']] = {
                        'total_bytes_used': rses['bytes']
                    }
                # else if it  is, update replica numbers
                else:
                    STORAGE['USAGE'][
                        rses['rse']]['total_bytes_used'] += rses['bytes']
        return (STORAGE)
Exemple #4
0
class CMSRucio(object):
    """
    Interface for Rucio with the CMS data model

    CMS         Rucio
    File/LFN    File
    Block       Dataset
    Dataset     Container

    We try to use the correct terminology on for variable and parameter names
    where the CMS facing code use File/Block/Dataset and the Rucio facing code
    uses File/Dataset/Container
    """
    def __init__(self,
                 account,
                 auth_type,
                 creds=None,
                 scope='cms',
                 dry_run=False,
                 das_go_path=DEFAULT_DASGOCLIENT,
                 check=False):
        self.account = account
        self.auth_type = auth_type
        self.creds = creds
        self.scope = scope
        self.dry_run = dry_run
        self.dasgoclient = das_go_path
        self.check = check

        self.cli = Client(account=self.account,
                          auth_type=self.auth_type,
                          creds=self.creds)

        self.gfal = Gfal2Context()

    def get_file_url(self, lfn, rse):
        """
        Return the rucio url of a file.
        """
        return self.get_global_url(rse) + '/' + lfn

    def get_global_url(self, rse):
        """
        Return the base path of the rucio url
        """
        print("Getting parameters for rse %s" % rse)

        # rse = rsemgr.get_rse_info(rse)
        # proto = rse['protocols'][0]
        protos = self.cli.get_protocols(rse)
        proto = protos[0]

        schema = proto['scheme']
        prefix = proto['prefix'] + '/' + self.scope.replace('.', '/')
        if schema == 'srm':
            prefix = proto['extended_attributes']['web_service_path'] + prefix
        url = schema + '://' + proto['hostname']
        if proto['port'] != 0:
            url = url + ':' + str(proto['port'])
        url = url + prefix
        print("Determined base url %s" % url)
        return url

    def check_storage(self, filemd, rse):
        """
        Check size and checksum of a file on storage
        """
        url = self.get_file_url(filemd['name'], rse)
        print("checking url %s" % url)
        try:
            size = self.gfal.stat(str(url)).st_size
            checksum = self.gfal.checksum(str(url), 'adler32')
            print("got size and checksum of file: pfn=%s size=%s checksum=%s" %
                  (url, size, checksum))
        except GError:
            print("no file found at %s" % url)
            return False
        if str(size) != str(filemd['size']):
            print("wrong size for file %s. Expected %s got %s" %
                  (filemd['name'], filemd['size'], size))
            return False
        if str(checksum) != str(filemd['checksum']):
            print("wrong checksum for file %s. Expected %s git %s" %
                  (filemd['name'], filemd['checksum'], checksum))
            return False
        print("size and checksum are ok")
        return True

    def cms_blocks_in_container(self, container, scope='cms'):
        """
        getting the cms_blocks (rucio datasets) in a rucio container
        """

        block_names = []
        response = self.cli.get_did(scope=scope, name=container)
        if response['type'].upper() != 'CONTAINER':
            return block_names

        response = self.cli.list_content(scope=scope, name=container)
        for item in response:
            if item['type'].upper() == 'DATASET':
                block_names.append(item['name'])

        return block_names

    def get_replica_info_for_blocks(self,
                                    scope='cms',
                                    dataset=None,
                                    block=None,
                                    node=None):
        """
        This mimics the API of a CMS PhEDEx function. Be careful changing it

        _blockreplicas_
        Get replicas for given blocks

        dataset        dataset name, can be multiple (*)
        block          block name, can be multiple (*)
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since  unix timestamp, only return replicas updated since this
                time
        create_since   unix timestamp, only return replicas created since this
                time
        complete       y or n, whether or not to require complete or incomplete
                blocks. Default is to return either
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                to return either.
        group          group name.  default is to return replicas for any group.
        """

        block_names = []
        result = {'block': []}

        if isinstance(block, (list, set)):
            block_names = block
        elif block:
            block_names = [block]

        if isinstance(dataset, (list, set)):
            for dataset_name in dataset:
                block_names.extend(
                    self.cms_blocks_in_container(dataset_name, scope=scope))
        elif dataset:
            block_names.extend(
                self.cms_blocks_in_container(dataset, scope=scope))

        for block_name in block_names:
            dids = [{
                'scope': scope,
                'name': block_name
            } for block_name in block_names]

            response = self.cli.list_replicas(dids=dids)
            nodes = set()
            for item in response:
                for node, state in item['states'].items():
                    if state.upper() == 'AVAILABLE':
                        nodes.add(node)
            result['block'].append({block_name: list(nodes)})
        return result

    def dataset_summary(self, scope='cms', dataset=None):
        """
        Summary of a dataset metadata
        """
        response = self.cli.list_files(scope=scope, name=dataset)
        summary = {'files': {}, 'dataset': dataset}
        dataset_bytes = 0
        dataset_events = 0
        dataset_files = 0
        files = []
        for fileobj in response:
            dataset_files += 1
            summary['files'].update({
                fileobj['name']: {
                    'bytes': fileobj['bytes'],
                    'events': fileobj['events'],
                }
            })
            files.append({'scope': scope, 'name': fileobj['name']})
            if fileobj['bytes']:
                dataset_bytes += fileobj['bytes']

            if fileobj['events']:
                dataset_events += fileobj['events']
        summary.update({
            'bytes': dataset_bytes,
            'events': dataset_events,
            'file_count': dataset_files
        })
        summary.update({'size': self.convert_size_si(dataset_bytes)})

        site_summary = {}

        for chunk in self.grouper(files, 1000):
            response = self.cli.list_replicas(dids=chunk)
            for item in response:
                lfn = item['name']
                for node, state in item['states'].items():
                    if state.upper() == 'AVAILABLE':
                        if node not in site_summary:
                            site_summary[node] = {
                                'file_count': 0,
                                'bytes': 0,
                                'events': 0
                            }
                        site_summary[node]['file_count'] += 1
                        if summary['files'][lfn]['bytes']:
                            site_summary[node]['bytes'] += summary['files'][
                                lfn]['bytes']
                        if summary['files'][lfn]['events']:
                            site_summary[node]['events'] += summary['files'][
                                lfn]['events']

        for node in site_summary:
            site_summary[node]['size'] = self.convert_size_si(
                site_summary[node]['bytes'])

        summary['sites'] = site_summary

        return summary

    def register_replicas(self, rse, replicas):
        """
        Register file replicas
        """

        if not replicas:
            return
        if self.dry_run:
            print(' Dry run only. Not registering files.')
            return

        if self.check:
            filtered_replicas = []
            for filemd in replicas:
                if self.check_storage(filemd, rse):
                    filtered_replicas.append(filemd)
            replicas = filtered_replicas

        self.cli.add_replicas(rse=rse,
                              files=[{
                                  'scope': self.scope,
                                  'name': filemd['name'],
                                  'adler32': filemd['checksum'],
                                  'bytes': filemd['size'],
                              } for filemd in replicas])

    def delete_replicas(self, rse, replicas):
        """
        Delete replicas from the current RSE.
        """
        if not replicas:
            return

        print("Deleting files from %s in Rucio: %s" %
              (rse, ", ".join([filemd['name'] for filemd in replicas])))

        if self.dry_run:
            print(" Dry run only.  Not deleting replicas.")
            return

        try:
            self.cli.delete_replicas(rse=rse,
                                     files=[{
                                         'scope': self.scope,
                                         'name': filemd['name'],
                                     } for filemd in replicas])
        except AccessDenied:
            print("Permission denied in deleting replicas: %s" %
                  ", ".join([filemd['name'] for filemd in replicas]))

    def register_dataset(self, block, dataset, lifetime=None):
        """
        Create the rucio dataset corresponding to a CMS block and
        attach it to the container (CMS dataset)
        """

        if self.dry_run:
            print(' Dry run only. Not creating dataset (CMS block %s).' %
                  block)
            return

        try:
            self.cli.add_dataset(scope=self.scope,
                                 name=block,
                                 lifetime=lifetime)
        except DataIdentifierAlreadyExists:
            pass

        try:
            self.cli.attach_dids(scope=self.scope,
                                 name=dataset,
                                 dids=[{
                                     'scope': self.scope,
                                     'name': block
                                 }])
        except RucioException:
            pass

    def register_container(self, dataset, lifetime):
        """
        Create a container (CMS Dataset)
        """

        if self.dry_run:
            print(' Dry run only. Not creating container (CMS dataset %s).' %
                  dataset)
            return

        try:
            self.cli.add_container(scope=self.scope,
                                   name=dataset,
                                   lifetime=lifetime)
        except DataIdentifierAlreadyExists:
            pass

    def attach_files(self, lfns, block):
        """
        Attach the file to the container
        """
        if not lfns:
            return

        if self.dry_run:
            print(' Dry run only. Not attaching files to %s.' % block)
            return

        try:
            self.cli.attach_dids(scope=self.scope,
                                 name=block,
                                 dids=[{
                                     'scope': self.scope,
                                     'name': lfn
                                 } for lfn in lfns])
        except FileAlreadyExists:
            pass

    def get_phedex_metadata(self, dataset, pnn):
        """
        Gets the list of blocks at a PhEDEx site, their files and their metadata
        """
        print("Initializing... getting the list of blocks and files")
        return_blocks = {}
        blocks = das_go_client(
            "block dataset=%s site=%s system=phedex" % (dataset, pnn),
            self.dasgoclient)
        for item in blocks:
            block_summary = {}
            block_name = item['block'][0]['name']
            files = das_go_client(
                "file block=%s site=%s system=phedex" % (block_name, pnn),
                self.dasgoclient)
            for item2 in files:

                # sometimes dasgoclient does not return the checksum attribute for a file
                # re-fetching data fix the problem
                try:
                    item2['file'][0]['checksum']
                except KeyError:
                    print(
                        "file %s misses checksum attribute, try to refetch from das",
                        item2['file'][0]['name'])
                    time.sleep(5)
                    dummy = das_go_client("file file=%s system=phedex" %
                                          item2['file'][0]['name'])
                    item2['file'][0] = dummy['file'][0]

                try:
                    cksum = re.match(r"\S*adler32:([^,]+)",
                                     item2['file'][0]['checksum']).group(1)
                except AttributeError:
                    raise AttributeError("file %s has non parsable checksum entry %s"\
                                         % (item2['file'][0]['name'], item2['file'][0]['checksum']))

                cksum = "{0:0{1}x}".format(int(cksum, 16), 8)
                block_summary[item2['file'][0]['name']] = {
                    'name': item2['file'][0]['name'],
                    'checksum': cksum,
                    'size': item2['file'][0]['size']
                }
            return_blocks[block_name] = block_summary
        print("PhEDEx initalization done.")

        return return_blocks

    def add_rule(self, names, rse_exp, comment, copies=1):
        """
        Just wrapping the add_replication_rule method of the ruleclient
        """

        dids = [{'scope': self.scope, 'name': name} for name in names]

        if self.dry_run:
            print("Dry run, no rule added.")
            return

        self.cli.add_replication_rule(dids=dids,
                                      copies=copies,
                                      rse_expression=rse_exp,
                                      comment=comment)

    def del_rule(self, rid):
        """
        Just wrapping the delete_replication_rule method of ruleclient
        """

        if self.dry_run:
            print("Dry run, rule %s not deleted." % rid)
            return

        try:
            self.cli.delete_replication_rule(rid, purge_replicas=False)
        except AccessDenied:
            print("Premission denied in removing rule (rid: %s)" % rid)
            raise AccessDenied

    def update_rule(self, rid, options):
        """
        Just wrapping the update_replication_rule method of ruleclient
        """

        if self.dry_run:
            print("Dry run, rule %s not modified." % rid)
            return

        self.cli.update_replication_rule(rid, options)

    @staticmethod
    # FIXME: Pull this from WMCore/Utils/IteratorTools when we migrate
    def grouper(iterable, csize):
        """
        :param iterable: List of other iterable to slice
        :type: iterable
        :param csize: Chunk size for resulting lists
        :type: int
        :return: iterator of the sliced list
        Source: http://stackoverflow.com/questions/3992735/
           python-generator-that-groups-another-iterable-into-groups-of-n
        """
        iterable = iter(iterable)
        return iter(lambda: list(islice(iterable, csize)), [])

    @staticmethod
    def convert_size(size_bytes):
        """
        Convert size in bytes into human readable.
        Base 1024.
        """
        if size_bytes == 0:
            return "0B"
        size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
        ival = int(math.floor(math.log(size_bytes, 1024)))
        power = math.pow(1024, ival)
        size = round(size_bytes / power, 2)
        return "%s %s" % (size, size_name[ival])

    @staticmethod
    def convert_size_si(size_bytes):
        """
        Convert size in bytes into human readable.
        Base 1000.
        """
        if size_bytes == 0:
            return "0B"
        size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
        ival = int(math.floor(math.log(size_bytes, 1000)))
        power = math.pow(1000, ival)
        size = round(size_bytes / power, 2)
        return "%s %s" % (size, size_name[ival])
Exemple #5
0
def main():
    parser = ArgumentParser()
    parser.add_argument('runid', type=int, help='Run number')
    parser.add_argument('--dtype', help='dtype', required=True)
    parser.add_argument('--context', help='Context name', required=True)
    parser.add_argument('--rse', help='RSE to create replication rule at')
    parser.add_argument('--cmt', help='Global CMT version', default='ONLINE')
    parser.add_argument('--update-db',
                        help='flag to update runsDB',
                        dest='update_db',
                        action='store_true')
    parser.add_argument('--upload-to-rucio',
                        help='flag to upload to rucio',
                        dest='upload_to_rucio',
                        action='store_true')

    args = parser.parse_args()

    runid = args.runid
    runid_str = "%06d" % runid
    dtype = args.dtype

    dtypes = ['records', 'peaklets']

    # setup rucio client
    C = Client()

    # get context
    st = getattr(straxen.contexts, args.context)()

    # apply global version
    apply_global_version(st, args.cmt)

    for dtype in dtypes:

        # initialize plugin needed for processing this output type
        plugin = st._get_plugins((dtype, ), runid_str)[dtype]

        st._set_plugin_config(plugin, runid_str, tolerant=False)
        plugin.setup()

        for _dtype in plugin.provides:
            hash = get_hashes(st)[_dtype]

            # need to create the dataset we will be uploading data to out on the grid
            dataset = make_did(args.runid, _dtype, hash)
            scope, name = dataset.split(':')

            # check if this dataset exists
            existing_datasets = [
                i for i in C.list_dids(scope, filters=dict(type='dataset'))
            ]

            if name not in existing_datasets:
                C.add_dataset(scope, name)
                print(f"Dataset {dataset} created")
            else:
                print(f"Warning: The dataset {dataset} already exists!")
                #raise ValueError(f"The dataset {dataset} already exists!")

            #check if a rule already exists
            existing_rules = [
                i['rse_expression'] for i in C.list_did_rules(scope, name)
            ]

            # update runDB
            new_data_dict = dict()
            new_data_dict['location'] = args.rse
            new_data_dict['did'] = dataset
            new_data_dict['status'] = 'processing'
            new_data_dict['host'] = "rucio-catalogue"
            new_data_dict['type'] = _dtype
            new_data_dict['protocol'] = 'rucio'
            new_data_dict['creation_time'] = datetime.datetime.utcnow(
            ).isoformat()
            new_data_dict['creation_place'] = "OSG"
            new_data_dict['meta'] = dict(lineage=None,
                                         avg_chunk_mb=None,
                                         file_count=None,
                                         size_mb=None,
                                         strax_version=strax.__version__,
                                         straxen_version=straxen.__version__)

            if args.rse not in existing_rules:
                # 1 is the number of copies
                if args.upload_to_rucio:
                    C.add_replication_rule([dict(scope=scope, name=name)], 1,
                                           args.rse)
                    print(f"Replication rule at {args.rse} created")

                if args.update_db:
                    db.update_data(runid, new_data_dict)

                # send peaklets data to dali
                if dtype == 'peaklets' and args.rse != 'UC_DALI_USERDISK':
                    if args.upload_to_rucio:
                        C.add_replication_rule(
                            [dict(scope=scope, name=name)],
                            1,
                            'UC_DALI_USERDISK',
                            source_replica_expression=args.rse,
                            priority=5)