Exemple #1
0
    def _optionallyUploadLocalFile(self):
        """
        """

        if self.lfn != "":
            return

        if self.namePattern != "" and self.lfn == "":

            logger.info("I have a local DiracFile, however you're requesting it's location on the grid")
            logger.info("Shall I upload it to the grid before I continue?")
            decision = raw_input('[y] / n:')
            while not (decision.lower() in ['y', 'n'] or decision.lower() == ''):
                decision = raw_input('[y] / n:')

            if decision.lower() in ['y', '']:
                # upload namePattern to grid
                logger.debug("Uploading the file first")
                self.put()
            elif decision == 'n':
                logger.debug("Not uploading now")
                return
            else:
                # do Nothing
                logger.debug("Continuing without uploading file")

            if self.lfn == "":
                raise GangaFileError('Uploading of namePattern: %s failed' % self.namePattern)

        if self.namePattern == "" and self.lfn == "":
            raise GangaFileError('Cannot do anything if I don\'t have an lfn or a namePattern!')

        return
Exemple #2
0
    def get(self):
        """
        Retrieves locally all files that were uploaded before that 
        Order of priority about where a file is going to be placed are:
            1) The localDir as defined in the schema. (Exceptions thrown if this doesn't exist)
            2) The Job outpudir of the parent job if the localDir is not defined.
            3) raise an exception if neither are defined correctly.
        """
        if self.localDir:
            if not os.path.isdir(self.localDir):
                msg = "Folder '%s' doesn't exist. Please construct this before 'get'-ing a file." % self.localDir
                raise GangaFileError(msg)
            to_location = self.localDir
        else:
            try:
                to_location = self.getJobObject().outputdir
            except AssertionError:
                msg = "%s: Failed to get file object. Please set the `localDir` parameter and try again. e.g. file.localDir=os.getcwd();file.get()" % getName(
                    self)
                logger.debug("localDir value: %s" % self.localDir)
                logger.debug("parent: %s" % self._getParent())
                raise GangaFileError(msg)

        # FIXME CANNOT perform a remote globbing here in a nice way so have to just perform a copy when dealing with wildcards
        if not os.path.isfile(os.path.join(to_location, self.namePattern)):
            returnable = self.copyTo(to_location)
            if not self.localDir:
                self.localDir = to_location
            return returnable
        else:
            logger.debug("File: %s already exists, not performing copy" %
                         (os.path.join(to_location, self.namePattern), ))
            return True
Exemple #3
0
    def internalCopyTo(self, targetPath):
        """
        Retrieves locally the file matching this DiracFile object pattern.
        If localPath is specified
        Args:
            targetPath(str): The path the file should be placed at locally
        """

        to_location = targetPath

        if self.lfn == "":
            raise GangaFileError('Can\'t download a file without an LFN.')

        logger.info("Getting file %s" % self.lfn)
        stdout = execute('getFile("%s", destDir="%s")' %
                         (self.lfn, to_location),
                         cred_req=self.credential_requirements)

        if self.namePattern == "":
            name = os.path.basename(self.lfn)
            if self.compressed:
                name = name[:-3]
            self.namePattern = name

        if self.guid == "" or not self.locations:
            self.getMetadata()

        return True
Exemple #4
0
    def getMetadata(self):
        """
        Get Metadata associated with this files lfn. This method will also
        try to automatically set the files guid attribute.
        """

        if self.lfn == "":
            self._optionallyUploadLocalFile()

        # check that it has a replica
        if not self.getReplicas():
            raise GangaFileError("No replica found for this file!")

        # eval again here as datatime not included in dirac_ganga_server

        ret = execute('getMetadata("%s")' % self.lfn,
                      cred_req=self.credential_requirements)

        if self.guid != ret.get('Successful', {}).get(self.lfn, {}).get(
                'GUID', False):
            self.guid = ret['Successful'][self.lfn]['GUID']

        reps = self.getReplicas()
        ret['Successful'][self.lfn].update({'replicas': self.locations})

        return ret
Exemple #5
0
    def copyTo(self, targetPath):
        """
        Copy a the file to the local storage using the appropriate file-transfer mechanism
        This will raise an exception if targetPath isn't set to something sensible.
        Args:
            targetPath (str): Target path where the file is to copied to
        """
        if not isinstance(targetPath, str) and targetPath:
            raise GangaFileError(
                "Cannot perform a copyTo with no given targetPath!")
        if regex.search(self.namePattern) is None\
            and os.path.isfile(os.path.join(self.localDir, self.namePattern)):

            if not os.path.isfile(os.path.join(targetPath, self.namePattern)):
                shutil.copy(os.path.join(self.localDir, self.namePattern),
                            os.path.join(targetPath, self.namePattern))
            else:
                logger.debug("Already found file: %s" %
                             os.path.join(targetPath, self.namePattern))

            return True

        # Again, cannot perform a remote glob here so have to ignore wildcards
        else:
            return self.internalCopyTo(targetPath)
Exemple #6
0
    def getReplicas(self, forceRefresh=False):
        """
        Get the list of all SE where this file has a replica
        This relies on an internally stored list of replicas, (SE and  unless forceRefresh = True
        """

        if self.lfn == '':
            self._optionallyUploadLocalFile()
        if self.lfn == '':
            raise GangaFileError(
                "Can't find replicas for file which has no LFN!")

        these_replicas = None

        if len(self.subfiles) != 0:

            allReplicas = []
            for i in self.subfiles:
                allReplicas.append(i.getReplicas())

            these_replicas = allReplicas

        else:
            # deep copy just before wer change it incase we're pointing to the
            # data stored in original from a copy
            if self._have_copied:
                self._storedReplicas = copy.deepcopy(self._storedReplicas)
            if (self._storedReplicas == {}
                    and len(self.subfiles) == 0) or forceRefresh:

                try:
                    self._storedReplicas = execute(
                        'getReplicas("%s")' % self.lfn,
                        cred_req=self.credential_requirements)
                except GangaDiracError as err:
                    logger.error("Couldn't find replicas for: %s" %
                                 str(self.lfn))
                    self._storedReplicas = {}
                    raise

                try:
                    self._storedReplicas = self._storedReplicas['Successful']
                except Exception as err:
                    logger.error("Unknown Error: %s from %s" %
                                 (str(err), self._storedReplicas))
                    raise

                logger.debug("getReplicas: %s" % str(self._storedReplicas))

                if self.lfn in self._storedReplicas:
                    self._updateRemoteURLs(self._storedReplicas)

                    these_replicas = [self._storedReplicas[self.lfn]]
                else:
                    these_replicas = {}
            elif self._storedReplicas != {}:
                these_replicas = [self._storedReplicas[self.lfn]]

        return these_replicas
Exemple #7
0
    def remove(self):
        """
        Remove this lfn and all replicas from DIRAC LFC/SEs
        """
        if self.lfn == "":
            raise GangaFileError('Can\'t remove a  file from DIRAC SE without an LFN.')
        logger.info('Removing file %s' % self.lfn)
        stdout = execute('removeFile("%s")' % self.lfn, cred_req=self.credential_requirements)

        self.lfn = ""
        self.locations = []
        self.guid = ''
        return True
Exemple #8
0
    def removeReplica(self, SE):
        """
        Remove the replica from the given SE
        """
        self.getReplicas()
        if SE not in self.locations:
            raise GangaFileError("No replica at supplied SE: %s" % SE)
        try:
            logger.info("Removing replica at %s for LFN %s" % (SE, self.lfn))
            stdout = execute('removeReplica("%s", "%s")' % (self.lfn, SE),
                             cred_req=self.credential_requirements)
            self.locations.remove(SE)
        except GangaDiracError as err:
            raise err

        return True
Exemple #9
0
    def replicate(self, destSE, sourceSE=''):
        """
        Replicate an LFN to another SE

        Args:
            destSE (str): the SE to replicate the file to
            sourceSE (str): the se to use as a cource for the file
        """

        if not self.lfn:
            raise GangaFileError('Must supply an lfn to replicate')

        logger.info("Replicating file %s to %s" % (self.lfn, destSE))
        stdout = execute('replicateFile("%s", "%s", "%s")' % (self.lfn, destSE, sourceSE), cred_req=self.credential_requirements)

        if destSE not in self.locations:
            self.locations.append(destSE)
Exemple #10
0
    def put(self, lfn='', force=False, uploadSE="", replicate=False):
        """
        Try to upload file sequentially to storage elements defined in configDirac['allDiracSE'].
        File will be uploaded to the first SE that the upload command succeeds for.

        The file is uploaded to the SE described by the DiracFile.defaultSE attribute

        Alternatively, the user can specify an uploadSE which contains an SE
        which the file is to be uploaded to.

        If the user wants to replicate this file(s) across all SE then they should state replicate = True.

        Return value will be either the stdout from the dirac upload command if not
        using the wildcard characters '*?[]' in the namePattern.
        If the wildcard characters are used then the return value will be a list containing
        newly created DiracFile objects which were the result of glob-ing the wildcards.

        The objects in this list will have been uploaded or had their failureReason attribute populated if the
        upload failed.
        """

        if self.lfn != "" and force == False and lfn == '':
            logger.warning(
                "Warning you're about to 'put' this DiracFile: %s on the grid as it already has an lfn: %s"
                % (self.namePattern, self.lfn))
            decision = raw_input('y / [n]:')
            while not (decision.lower() in ['y', 'n']
                       or decision.lower() == ''):
                decision = raw_input('y / [n]:')

            if decision.lower() == 'y':
                pass
            else:
                return

        if (lfn != '' and self.lfn != '') and force == False:
            logger.warning(
                "Warning you're attempting to put this DiracFile: %s" %
                self.namePattern)
            logger.warning("It currently has an LFN associated with it: %s" %
                           self.lfn)
            logger.warning(
                "Do you want to continue and attempt to upload to: %s" % lfn)
            decision = raw_input('y / [n]:')
            while not (decision.lower() in ['y', 'n', '']):
                decision = raw_input('y / [n]:')

            if decision.lower() == 'y':
                pass
            else:
                return

        if lfn and os.path.basename(lfn) != self.namePattern:
            logger.warning(
                "Changing namePattern from: '%s' to '%s' during put operation"
                % (self.namePattern, os.path.basename(lfn)))

        if lfn:
            self.lfn = lfn

        # looks like will only need this for the interactive uploading of jobs.
        # Also if any backend need dirac upload on client then when downloaded
        # this will upload then delete the file.

        if self.namePattern == "":
            if self.lfn != '':
                logger.warning(
                    "'Put'-ing a file with ONLY an existing LFN makes no sense!"
                )
            raise GangaFileError(
                'Can\'t upload a file without a local file name.')

        sourceDir = self.localDir
        if self.localDir is None:
            sourceDir = os.getcwd()
            # attached to a job, use the joboutputdir
            if self._parent != None and os.path.isdir(
                    self.getJobObject().outputdir):
                sourceDir = self.getJobObject().outputdir

        if not os.path.isdir(sourceDir):
            raise GangaFileError(
                'localDir attribute is not a valid dir, don\'t know from which dir to take the file'
            )

        if regex.search(self.namePattern) is not None:
            if self.lfn != "":
                logger.warning(
                    "Cannot specify a single lfn for a wildcard namePattern")
                logger.warning("LFN will be generated automatically")
                self.lfn = ""

        if not self.remoteDir:
            try:
                job = self.getJobObject()
                lfn_folder = os.path.join("GangaJob_%s" % job.getFQID('/'),
                                          "OutputFiles")
            except AssertionError:
                t = datetime.datetime.now()
                this_date = t.strftime("%H.%M_%A_%d_%B_%Y")
                lfn_folder = os.path.join('GangaFiles_%s' % this_date)
            lfn_base = os.path.join(
                DiracFile.diracLFNBase(self.credential_requirements),
                lfn_folder)

        else:
            lfn_base = os.path.join(
                DiracFile.diracLFNBase(self.credential_requirements),
                self.remoteDir)

        if uploadSE == "":
            if self.defaultSE != "":
                storage_elements = [self.defaultSE]
            else:
                if configDirac['allDiracSE']:
                    storage_elements = [
                        random.choice(configDirac['allDiracSE'])
                    ]
                else:
                    raise GangaFileError(
                        "Can't upload a file without a valid defaultSE or storageSE, please provide one"
                    )
        elif isinstance(uploadSE, list):
            storage_elements = uploadSE
        else:
            storage_elements = [uploadSE]

        outputFiles = GangaList()
        for this_file in glob.glob(os.path.join(sourceDir, self.namePattern)):
            name = this_file

            if not os.path.exists(name):
                if not self.compressed:
                    raise GangaFileError(
                        'Cannot upload file. File "%s" must exist!' % name)
                name += '.gz'
                if not os.path.exists(name):
                    raise GangaFileError('File "%s" must exist!' % name)
            else:
                if self.compressed:
                    os.system('gzip -c %s > %s.gz' % (name, name))
                    name += '.gz'
                    if not os.path.exists(name):
                        raise GangaFileError('File "%s" must exist!' % name)

            lfn = os.path.join(lfn_base, os.path.basename(this_file))

            d = DiracFile()
            d.namePattern = os.path.basename(name)
            d.compressed = self.compressed
            d.localDir = sourceDir
            stderr = ''
            stdout = ''
            logger.info('Uploading file \'%s\' to \'%s\' as \'%s\'' %
                        (name, storage_elements[0], lfn))
            logger.debug('execute: uploadFile("%s", "%s", %s)' %
                         (lfn, os.path.join(sourceDir,
                                            name), str([storage_elements[0]])))
            try:
                stdout = execute('uploadFile("%s", "%s", %s)' %
                                 (lfn, os.path.join(sourceDir, name),
                                  str([storage_elements[0]])),
                                 cred_req=self.credential_requirements)
            except GangaDiracError as err:
                logger.warning("Couldn't upload file '%s': \'%s\'" %
                               (os.path.basename(name), err))
                failureReason = "Error in uploading file '%s' : '%s'" % (
                    os.path.basename(name), err)
                if regex.search(self.namePattern) is not None:
                    d.failureReason = failureReason
                    outputFiles.append(d)
                    continue
                self.failureReason += '\n' + failureReason
                continue

            stdout_temp = stdout.get('Successful')

            if not stdout_temp:
                msg = "Couldn't upload file '%s': \'%s\'" % (
                    os.path.basename(name), stdout)
                logger.warning(msg)
                if regex.search(self.namePattern) is not None:
                    d.failureReason = msg
                    outputFiles.append(d)
                    continue
                self.failureReason = msg
                continue
            else:
                lfn_out = stdout_temp[lfn]

            # when doing the two step upload delete the temp file
            if self.compressed or self._parent != None:
                os.remove(name)
            # need another eval as datetime needs to be included.
            guid = lfn_out.get('GUID', '')
            if regex.search(self.namePattern) is not None:
                d.lfn = lfn
                d.remoteDir = os.path.dirname(lfn)
                d.locations = lfn_out.get('allDiracSE', '')
                d.guid = guid
                outputFiles.append(d)
                continue
            else:
                self.lfn = lfn
                self.remoteDir = os.path.dirname(lfn)
                self.locations = lfn_out.get('allDiracSE', '')
                self.guid = guid

        if replicate == True:

            if len(outputFiles) == 1 or len(outputFiles) == 0:
                storage_elements.pop(0)
                for se in storage_elements:
                    self.replicate(se)
            else:
                storage_elements.pop(0)
                for this_file in outputFiles:
                    for se in storage_elements:
                        this_file.replicate(se)

        if len(outputFiles) > 0:
            return outputFiles
        else:
            outputFiles.append(self)
            return outputFiles
Exemple #11
0
 def processWildcardMatches(self):
     if regex.search(self.namePattern) is not None:
         raise GangaFileError(
             "No wildcards in inputfiles for DiracFile just yet. Dirac are exposing this in API soon."
         )