Beispiel #1
0
    def validate(self, apiobj, method, api, param, safe):
        """Validating all the input parameter as enforced by the WMCore.REST module"""
        authz_login_valid()

        if method in ['PUT']:
            #TODO check optional parameter
            #TODO check all the regexp
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_strlist("outfilelumis", param, safe, RX_LUMILIST)
            validate_numlist("outfileruns", param, safe)
            if len(safe.kwargs["outfileruns"]) != len(safe.kwargs["outfilelumis"]):
                raise InvalidParameter("The number of runs and the number of lumis lists are different")
            validate_strlist("inparentlfns", param, safe, RX_PARENTLFN)
            validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True)
            validate_str("jobid", param, safe, RX_JOBID, optional=True)
            #TODO: for backward compatibility. Get rid of the pandajobid once all jobs using it are done (one month after this gets released)
            #(The following five lines can be deleted)
            validate_num("pandajobid", param, safe, optional=True)
            if bool(safe.kwargs["jobid"]) == bool(safe.kwargs["pandajobid"]):
                raise InvalidParameter("Only one among jobid and pandajobid should be set")
            #Oracle/cx_oracle/python stack does not like None for numbers, even if they are nullable
            if safe.kwargs["pandajobid"] == None:
                safe.kwargs["pandajobid"] =  0
            validate_num("outsize", param, safe, optional=False)
            validate_str("publishdataname", param, safe, RX_PUBLISH, optional=False)
            validate_str("appver", param, safe, RX_CMSSW, optional=False)
            validate_str("outtype", param, safe, RX_OUTTYPES, optional=False)
            validate_str("checksummd5", param, safe, RX_CHECKSUM, optional=False)
            validate_str("checksumcksum", param, safe, RX_CHECKSUM, optional=False)
            validate_str("checksumadler32", param, safe, RX_CHECKSUM, optional=False)
            validate_str("outlocation", param, safe, RX_CMSSITE, optional=False)
            validate_str("outtmplocation", param, safe, RX_CMSSITE, optional=False)
            validate_str("acquisitionera", param, safe, RX_TASKNAME, optional=False)#TODO Do we really need this?
            validate_str("outdatasetname", param, safe, RX_OUTDSLFN, optional=False)#TODO temporary, need to come up with a regex
            validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False)
            validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True)
            validate_num("events", param, safe, optional=False)
            validate_str("filestate", param, safe, RX_FILESTATE, optional=True)
            validate_num("directstageout", param, safe, optional=True)
            safe.kwargs["directstageout"] = 'T' if safe.kwargs["directstageout"] else 'F' #'F' if not provided
        elif method in ['POST']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("outlfn", param, safe, RX_LFN, optional=False)
            validate_str("filestate", param, safe, RX_FILESTATE, optional=False)
        elif method in ['GET']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("filetype", param, safe, RX_OUTTYPES, optional=False)
            validate_num("howmany", param, safe, optional=True)
        elif method in ['DELETE']:
            authz_operator()
            validate_str("taskname", param, safe, RX_TASKNAME, optional=True)
            validate_str("hours", param, safe, RX_HOURS, optional=True)
            if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]):
               raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\
                                        " will be deleted. Only one of the two parameters can be specified.")
    def validate(self, apiobj, method, api, param, safe):
        """Validating all the input parameter as enforced by the WMCore.REST module"""
        authz_login_valid()

        if method in ['PUT']:
            #TODO check optional parameter
            #TODO check all the regexp
            validate_str("taskname", param, safe, RX_WORKFLOW, optional=False)
            validate_strlist("outfilelumis", param, safe, RX_LUMILIST)
            validate_numlist("outfileruns", param, safe)
            if len(safe.kwargs["outfileruns"]) != len(safe.kwargs["outfilelumis"]):
                raise InvalidParameter("The number of runs and the number of lumis lists are different")
            validate_strlist("inparentlfns", param, safe, RX_PARENTLFN)
            validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True)
            validate_num("pandajobid", param, safe, optional=False)
            validate_num("outsize", param, safe, optional=False)
            validate_str("publishdataname", param, safe, RX_PUBLISH, optional=False)
            validate_str("appver", param, safe, RX_CMSSW, optional=False)
            validate_str("outtype", param, safe, RX_OUTTYPES, optional=False)
            validate_str("checksummd5", param, safe, RX_CHECKSUM, optional=False)
            validate_str("checksumcksum", param, safe, RX_CHECKSUM, optional=False)
            validate_str("checksumadler32", param, safe, RX_CHECKSUM, optional=False)
            validate_str("outlocation", param, safe, RX_CMSSITE, optional=False)
            validate_str("outtmplocation", param, safe, RX_CMSSITE, optional=False)
            validate_str("acquisitionera", param, safe, RX_WORKFLOW, optional=False)#TODO Do we really need this?
            validate_str("outdatasetname", param, safe, RX_OUTDSLFN, optional=False)#TODO temporary, need to come up with a regex
            validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False)
            validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True)
            validate_num("events", param, safe, optional=False)
            validate_str("filestate", param, safe, RX_FILESTATE, optional=True)
            validate_num("directstageout", param, safe, optional=True)
            safe.kwargs["directstageout"] = 'T' if safe.kwargs["directstageout"] else 'F' #'F' if not provided
        elif method in ['POST']:
            validate_str("taskname", param, safe, RX_WORKFLOW, optional=False)
            validate_str("outlfn", param, safe, RX_LFN, optional=False)
            validate_str("filestate", param, safe, RX_FILESTATE, optional=False)
        elif method in ['GET']:
            validate_str("taskname", param, safe, RX_WORKFLOW, optional=False)
            validate_str("filetype", param, safe, RX_OUTTYPES, optional=False)
        elif method in ['DELETE']:
            authz_operator()
            validate_str("taskname", param, safe, RX_WORKFLOW, optional=True)
            validate_str("hours", param, safe, RX_HOURS, optional=True)
            if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]):
               raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\
                                        " will be deleted. Only one of the two parameters can be specified.")
Beispiel #3
0
    def validate(self, apiobj, method, api, param, safe):
        """Validating all the input parameter as enforced by the WMCore.REST module"""
        authz_login_valid()

        if method in ['PUT']:
            #TODO check optional parameter
            #TODO check all the regexp
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_strlist("outfilelumis", param, safe, RX_LUMILIST)
            validate_numlist("outfileruns", param, safe)
            if len(safe.kwargs["outfileruns"]) != len(
                    safe.kwargs["outfilelumis"]):
                raise InvalidParameter(
                    "The number of runs and the number of lumis lists are different"
                )
            validate_strlist("inparentlfns", param, safe, RX_PARENTLFN)
            validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True)
            validate_num("pandajobid", param, safe, optional=False)
            validate_num("outsize", param, safe, optional=False)
            validate_str("publishdataname",
                         param,
                         safe,
                         RX_PUBLISH,
                         optional=False)
            validate_str("appver", param, safe, RX_CMSSW, optional=False)
            validate_str("outtype", param, safe, RX_OUTTYPES, optional=False)
            validate_str("checksummd5",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_str("checksumcksum",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_str("checksumadler32",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_str("outlocation",
                         param,
                         safe,
                         RX_CMSSITE,
                         optional=False)
            validate_str("outtmplocation",
                         param,
                         safe,
                         RX_CMSSITE,
                         optional=False)
            validate_str("acquisitionera",
                         param,
                         safe,
                         RX_TASKNAME,
                         optional=False)  #TODO Do we really need this?
            validate_str(
                "outdatasetname", param, safe, RX_OUTDSLFN,
                optional=False)  #TODO temporary, need to come up with a regex
            validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False)
            validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True)
            validate_num("events", param, safe, optional=False)
            validate_str("filestate", param, safe, RX_FILESTATE, optional=True)
            validate_num("directstageout", param, safe, optional=True)
            safe.kwargs["directstageout"] = 'T' if safe.kwargs[
                "directstageout"] else 'F'  #'F' if not provided
        elif method in ['POST']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("outlfn", param, safe, RX_LFN, optional=False)
            validate_str("filestate",
                         param,
                         safe,
                         RX_FILESTATE,
                         optional=False)
        elif method in ['GET']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("filetype", param, safe, RX_OUTTYPES, optional=False)
        elif method in ['DELETE']:
            authz_operator()
            validate_str("taskname", param, safe, RX_TASKNAME, optional=True)
            validate_str("hours", param, safe, RX_HOURS, optional=True)
            if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]):
                raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\
                                         " will be deleted. Only one of the two parameters can be specified.")
Beispiel #4
0
    def validate(self, apiobj, method, api, param, safe):
        """Validating all the input parameter as enforced by the WMCore.REST module"""
        authz_login_valid()

        if method in ['PUT']:
            #TODO check optional parameter
            #TODO check all the regexp
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_strlist("outfilelumis", param, safe, RX_LUMILIST)
            validate_numlist("outfileruns", param, safe)
            if len(safe.kwargs["outfileruns"]) != len(
                    safe.kwargs["outfilelumis"]):
                raise InvalidParameter(
                    "The number of runs and the number of lumis lists are different"
                )
            validate_strlist("inparentlfns", param, safe, RX_PARENTLFN)
            validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True)
            validate_str("jobid", param, safe, RX_JOBID, optional=True)
            #TODO: for backward compatibility. Get rid of the pandajobid once all jobs using it are done (one month after this gets released)
            #(The following five lines can be deleted)
            validate_num("pandajobid", param, safe, optional=True)
            if bool(safe.kwargs["jobid"]) == bool(safe.kwargs["pandajobid"]):
                raise InvalidParameter(
                    "Only one among jobid and pandajobid should be set")
            #Oracle/cx_oracle/python stack does not like None for numbers, even if they are nullable
            if safe.kwargs["pandajobid"] == None:
                safe.kwargs["pandajobid"] = 0
            validate_num("outsize", param, safe, optional=False)
            validate_str("publishdataname",
                         param,
                         safe,
                         RX_PUBLISH,
                         optional=False)
            validate_str("appver", param, safe, RX_CMSSW, optional=False)
            validate_str("outtype", param, safe, RX_OUTTYPES, optional=False)
            validate_str("checksummd5",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_str("checksumcksum",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_str("checksumadler32",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_str("outlocation",
                         param,
                         safe,
                         RX_CMSSITE,
                         optional=False)
            validate_str("outtmplocation",
                         param,
                         safe,
                         RX_CMSSITE,
                         optional=False)
            validate_str("acquisitionera",
                         param,
                         safe,
                         RX_TASKNAME,
                         optional=False)  #TODO Do we really need this?
            validate_str(
                "outdatasetname", param, safe, RX_OUTDSLFN,
                optional=False)  #TODO temporary, need to come up with a regex
            validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False)
            validate_str("outtmplfn", param, safe, RX_PARENTLFN, optional=True)
            validate_num("events", param, safe, optional=False)
            validate_str("filestate", param, safe, RX_FILESTATE, optional=True)
            validate_num("directstageout", param, safe, optional=True)
            safe.kwargs["directstageout"] = 'T' if safe.kwargs[
                "directstageout"] else 'F'  #'F' if not provided
        elif method in ['POST']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("outlfn", param, safe, RX_LFN, optional=False)
            validate_str("filestate",
                         param,
                         safe,
                         RX_FILESTATE,
                         optional=False)
        elif method in ['GET']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("filetype", param, safe, RX_OUTTYPES, optional=False)
            validate_num("howmany", param, safe, optional=True)
            validate_strlist("lfn", param, safe, RX_LFN)
        elif method in ['DELETE']:
            authz_operator()
            validate_str("taskname", param, safe, RX_TASKNAME, optional=True)
            validate_str("hours", param, safe, RX_HOURS, optional=True)
            if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]):
                raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\
                                         " will be deleted. Only one of the two parameters can be specified.")
Beispiel #5
0
    def get(self, subresource, objecttype, taskname, username, tarballname):  # pylint: disable=redefined-builtin
        """
           :arg str subresource: the specific information to be accessed;
        """
        authenticatedUserName = cherrypy.request.user[
            'login']  # the username of who's calling
        # a bit of code common to 3 subresource's: validate args and prepare the s3_objectKey inside the bucket
        if subresource in ['upload', 'retrieve', 'download']:
            if not objecttype:
                raise MissingParameter("objecttype is missing")
            if objecttype == 'sandbox':
                if not tarballname:
                    raise MissingParameter("tarballname is missing")
                ownerName = authenticatedUserName if subresource == 'upload' else username
                # sandbox goes in bucket/username/sandboxes/
                objectPath = ownerName + '/sandboxes/' + tarballname
            else:
                if not taskname:
                    raise MissingParameter("takskname is missing")
                ownerName = getUsernameFromTaskname(taskname)
                # task related files go in bucket/username/taskname/
                objectPath = ownerName + '/' + taskname + '/' + objecttype
            s3_objectKey = fromNewBytesToString(objectPath)

        if subresource == 'upload':
            # returns a dictionary with the information to upload a file with a POST
            # via a "PreSigned URL". It can return  an empty string '' as URL to indicate that
            # a sandbox upload request refers to an existing object with same name
            # WMCore REST does not allow to return None
            authz_operator(username=ownerName, group='crab3', role='operator')
            if objecttype == 'sandbox':
                # we only upload same sandbox once
                alreadyThere = False
                try:
                    # from https://stackoverflow.com/a/38376288
                    self.s3_client.head_object(Bucket=self.s3_bucket,
                                               Key=s3_objectKey)
                    alreadyThere = True
                except ClientError:
                    pass
                if alreadyThere:
                    return ["", {}]  # this tells client not to upload
            expiration = 60 * 60  # 1 hour is good for retries and debugging
            try:
                response = self.s3_client.generate_presigned_post(
                    self.s3_bucket, s3_objectKey, ExpiresIn=expiration)
                # this returns a dictionary like:
                # {'url': u'https://s3.cern.ch/bucket1',
                # 'fields': {'policy': u'eyJjb ... jEzWiJ9', # policy is a 164-char-long string
                # 'AWSAccessKeyId': u'5d4270f1e022442783646c34cf552d55',
                # 'key': objectPath, 'signature': u'pm58cUqxNQHBZXS1B/Er6P89IhU='}}
                # need to build a single URL string to return
                preSignedUrl = response
            except ClientError as e:
                raise ExecutionError("Connection to s3.cern.ch failed:\n%s" %
                                     str(e))
            # somehow it does not work to return preSignedUrl as a single object
            return [preSignedUrl['url'], preSignedUrl['fields']]

        if subresource == 'download':
            authz_operator(username=ownerName, group='crab3', role='operator')
            if subresource == 'sandbox' and not username:
                raise MissingParameter("username is missing")
            # returns a PreSignedUrl to download the file within the expiration time
            expiration = 60 * 60  # 1 hour default is good for retries and debugging
            if objecttype in ['debugfiles', 'clientlog', 'twlog']:
                expiration = 60 * 60 * 24 * 30  # for logs make url valid as long as we keep files (1 month)
            try:
                response = self.s3_client.generate_presigned_url(
                    'get_object',
                    Params={
                        'Bucket': self.s3_bucket,
                        'Key': s3_objectKey
                    },
                    ExpiresIn=expiration)
                preSignedUrl = response
            except ClientError as e:
                raise ExecutionError("Connection to s3.cern.ch failed:\n%s" %
                                     str(e))
            return preSignedUrl

        if subresource == 'retrieve':
            # download from S3 into a temporary file, read it, and return content to caller
            authz_operator(username=ownerName, group='crab3', role='operator')
            tempFile = '/tmp/boto.' + uuid.uuid4().hex
            try:
                self.s3_client.download_file(self.s3_bucket, s3_objectKey,
                                             tempFile)
            except ClientError as e:
                raise ExecutionError("Connection to s3.cern.ch failed:\n%s" %
                                     str(e))
            with open(tempFile) as f:
                txt = f.read()
            os.remove(tempFile)
            return txt

        if subresource == 'list':
            # list all files (aka objects, aka keys in S3 lingo) for a given usermame
            # if arg objecttype is present, returns only the file names for that objecttype
            if not username:
                raise MissingParameter('username is missing')
            # In S3 we always need to retrieve all keys even if some filtering/compression
            # will be applied before reporting, since there is a limit of 1K key per call,
            # multiple calls will be needed, S3 paginators make that easy
            # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html
            # We use S3 prefix to limit retrieved list to a user, since in our buckets
            # file keys always have the form <username>/... see:
            # https://github.com/dmwm/CRABServer/wiki/CRABCache-replacement-with-S3#bucket-organization  and
            # https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-prefixes.html
            #
            fileNames = []
            paginator = self.s3_client.get_paginator('list_objects_v2')
            user = fromNewBytesToString(username)
            operation_parameters = {'Bucket': self.s3_bucket, 'Prefix': user}
            page_iterator = paginator.paginate(**operation_parameters)
            for page in page_iterator:
                namesInPage = [
                    item['Key'].lstrip(user + '/') for item in page['Contents']
                ]
                fileNames += namesInPage
            if objecttype:
                filteredFileNames = [f for f in fileNames if objecttype in f]
                fileNames = filteredFileNames
            return fileNames

        if subresource == 'used':
            # return space used by username, in MBytes (rounded to integer)
            if not username:
                raise MissingParameter('username is missing')
            paginator = self.s3_client.get_paginator('list_objects_v2')
            user = fromNewBytesToString(username)
            operation_parameters = {'Bucket': self.s3_bucket, 'Prefix': user}
            page_iterator = paginator.paginate(**operation_parameters)
            # S3 records object size in bytes, see:
            # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.list_objects_v2
            usedBytes = 0
            for page in page_iterator:
                for item in page['Contents']:
                    usedBytes += item['Size']
            usedMBytes = usedBytes // 1024 // 1024
            # WMCore REST wants to return lists
            return [usedMBytes]
Beispiel #6
0
    def validate(self, apiobj, method, api, param, safe):
        """Validating all the input parameter as enforced by the WMCore.REST module"""
        authz_login_valid()

        if method in ['PUT']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_strlist("outfilelumis", param, safe, RX_LUMILIST)
            validate_strlist("outfileruns", param, safe, RX_RUNS)
            if len(safe.kwargs["outfileruns"]) != len(
                    safe.kwargs["outfilelumis"]):
                raise InvalidParameter(
                    "The number of runs and the number of lumis lists are different"
                )
            validate_strlist("inparentlfns", param, safe, RX_PARENTLFN)
            # inparentlfns will be inserted in Oracle as CLOB, so it must be a string
            safe.kwargs['inparentlfns'] = str(safe.kwargs['inparentlfns'])
            validate_str("globalTag", param, safe, RX_GLOBALTAG, optional=True)
            validate_str("jobid", param, safe, RX_JOBID, optional=True)
            safe.kwargs["pandajobid"] = 0
            validate_num("outsize", param, safe, optional=False)
            validate_str("publishdataname",
                         param,
                         safe,
                         RX_PUBLISH,
                         optional=False)
            validate_str("appver", param, safe, RX_CMSSW, optional=False)
            validate_str("outtype", param, safe, RX_OUTTYPES, optional=False)
            validate_str("checksummd5",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_num("checksumcksum", param, safe, optional=False)
            validate_str("checksumadler32",
                         param,
                         safe,
                         RX_CHECKSUM,
                         optional=False)
            validate_str("outlocation",
                         param,
                         safe,
                         RX_CMSSITE,
                         optional=False)
            validate_str("outtmplocation",
                         param,
                         safe,
                         RX_CMSSITE,
                         optional=False)
            validate_str("acquisitionera",
                         param,
                         safe,
                         RX_TASKNAME,
                         optional=False)
            validate_str("outdatasetname",
                         param,
                         safe,
                         RX_OUTDSLFN,
                         optional=False)
            # need to use RX_PARENTLFN becasue same API is also used for input metadata
            validate_str("outlfn", param, safe, RX_PARENTLFN, optional=False)
            validate_str("outtmplfn", param, safe, RX_LFN, optional=True)
            validate_num("events", param, safe, optional=False)
            validate_str("filestate", param, safe, RX_FILESTATE, optional=True)
            validate_num("directstageout", param, safe, optional=True)
            safe.kwargs["directstageout"] = 'T' if safe.kwargs[
                "directstageout"] else 'F'  #'F' if not provided
        elif method in ['POST']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("outlfn", param, safe, RX_LFN, optional=False)
            validate_str("filestate",
                         param,
                         safe,
                         RX_FILESTATE,
                         optional=False)
        elif method in ['GET']:
            validate_str("taskname", param, safe, RX_TASKNAME, optional=False)
            validate_str("filetype", param, safe, RX_OUTTYPES, optional=False)
            validate_num("howmany", param, safe, optional=True)
            validate_strlist("lfn", param, safe, RX_LFN)
        elif method in ['DELETE']:
            authz_operator()
            validate_str("taskname", param, safe, RX_TASKNAME, optional=True)
            validate_str("hours", param, safe, RX_HOURS, optional=True)
            if bool(safe.kwargs["taskname"]) == bool(safe.kwargs["hours"]):
                raise InvalidParameter("You have to specify a taskname or a number of hours. Files of this task or created before the number of hours"+\
                                        " will be deleted. Only one of the two parameters can be specified.")