Ejemplo n.º 1
0
    def buildListOfBadFiles(self):
        '''fills the list of bad files from the IntegrityCheck log.

        When the integrity check file is not available,
        files are considered as good.'''
        mask = "IntegrityCheck"
           
        self.bad_files = {}
        self.good_files = []

        file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
        if file_mask:
            from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem
            p = PublishToFileSystem(mask)
            report = p.get(self.castorDir)
            if report is not None and report:
                self.maskExists = True
                self.report = report
                dup = report.get('ValidDuplicates',{})
                for name, status in report['Files'].iteritems():
                    # print name, status
                    if not status[0]:
                        self.bad_files[name] = 'MarkedBad'
                    elif dup.has_key(name):
                        self.bad_files[name] = 'ValidDup'
                    else:
                        self.good_files.append( name )
        else:
            raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
Ejemplo n.º 2
0
    def buildListOfBadFiles(self):
        '''fills the list of bad files from the IntegrityCheck log.

        When the integrity check file is not available,
        files are considered as good.'''
        mask = "IntegrityCheck"

        self.bad_files = {}
        self.good_files = []

        file_mask = castortools.matchingFiles(self.castorDir,
                                              '^%s_.*\.txt$' % mask)
        if file_mask:
            from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem
            p = PublishToFileSystem(mask)
            report = p.get(self.castorDir)
            if report is not None and report:
                self.maskExists = True
                self.report = report
                dup = report.get('ValidDuplicates', {})
                for name, status in report['Files'].iteritems():
                    # print name, status
                    if not status[0]:
                        self.bad_files[name] = 'MarkedBad'
                    elif dup.has_key(name):
                        self.bad_files[name] = 'ValidDup'
                    else:
                        self.good_files.append(name)
        else:
            raise IntegrityCheckError(
                "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found"
            )
Ejemplo n.º 3
0
    def __init__(self, dirLocalOrTgzDirOnCastor, castorTgz, dbsAPI):
        #self.dbAPI = DatabaseAPI.DatabaseAPI('/afs/cern.ch/user/p/pmeckiff/public/bookkeeping.db')
        self.dirLocal = None
        self.tgzDirOnCastor = None
        self.dirOnCastor = None
        self.setName = dirLocalOrTgzDirOnCastor
        self.dbsAPI = dbsAPI

        
        # Set Directory name if local
        local = dirLocalOrTgzDirOnCastor.rstrip('/')
        castorTgz = castortools.castorToLFN(castorTgz)

        print castorTgz
        # Check if local first (obviously)
        if self.isDirLocal(local  ):
            print "File is on local machine: " + local
            self.dirLocal = local #if found set class attribute
        # Check if on castor next
        elif self.isTgzDirOnCastor(castorTgz):
            print "File is directory on Castor"
            self.tgzDirOnCastor = castorTgz # if found set class attribute
            for i in castortools.matchingFiles(castorTgz.rstrip("/Logger.tgz"), ".*tgz"): print i
        # If logger is not present but directory exists
        elif self.isDirOnCastor(castorTgz.rstrip("/Logger.tgz")):
            print "Directory is valid on Castor, but no logger file is present."
            self.dirOnCastor = castorTgz.rstrip("/Logger.tgz")
        # If neither then raise an exception
        else:
            raise ValueError( dirLocalOrTgzDirOnCastor + ' is neither a tgz directory on castor (provide a LFN!) nor a local directory')
Ejemplo n.º 4
0
 def get(self, dir):
     """Finds the lastest file and reads it"""
     reg = '^%s_.*\.txt$' % self.parent
     files = castortools.matchingFiles(dir, reg)
     files = sorted([ (os.path.basename(f), f) for f in files])
     if not files:
         return None
     return self.read(files[-1][1])
Ejemplo n.º 5
0
 def get(self, dir):
     """Finds the lastest file and reads it"""
     reg = '^%s_.*\.txt$' % self.parent
     files = castortools.matchingFiles(dir, reg)
     files = sorted([(os.path.basename(f), f) for f in files])
     if not files:
         return None
     return self.read(files[-1][1])
Ejemplo n.º 6
0
def checkForLogger( dataset_lfn_dir ):
	"""Checks the EOS directory for a Logger.tgz file, if not found, escapes
	'sampleName' takes the name of the sample as a string
	'fileOwner' takes the file owner on EOS as a string
	"""
	if len( eostools.matchingFiles( dataset_lfn_dir, "Logger.tgz" ) )  == 1:
		return createLoggerTemporaryFile( dataset_lfn_dir )
	else: 
		raise NameError("ERROR: No Logger.tgz file found for this sample. If you would like to preceed anyway, please copy Logger.tgz from your local production directory to your production directory on eos.\n")
Ejemplo n.º 7
0
def checkForLogger(dataset_lfn_dir):
    """Checks the EOS directory for a Logger.tgz file, if not found, escapes
	'sampleName' takes the name of the sample as a string
	'fileOwner' takes the file owner on EOS as a string
	"""
    if len(eostools.matchingFiles(dataset_lfn_dir, "Logger.tgz")) == 1:
        return createLoggerTemporaryFile(dataset_lfn_dir)
    else:
        raise NameError(
            "ERROR: No Logger.tgz file found for this sample. If you would like to preceed anyway, please copy Logger.tgz from your local production directory to your production directory on eos.\n"
        )
Ejemplo n.º 8
0
def burrow(LFN):
        returnFiles = []
        
        files = eostools.matchingFiles(LFN,"[!histo]*.")
        
        for i in files:
            if re.search(i, "histo"):
                del(files[i])
        for file in files:
            if not re.search("\.",file):
                for i in burrow(file): returnFiles.append(i)
            else:
                returnFiles.append(file)
        
        return returnFiles
Ejemplo n.º 9
0
def burrow(LFN):
    returnFiles = []

    files = eostools.matchingFiles(LFN, "[!histo]*.")

    for i in files:
        if re.search(i, "histo"):
            del (files[i])
    for file in files:
        if not re.search("\.", file):
            for i in burrow(file):
                returnFiles.append(i)
        else:
            returnFiles.append(file)

    return returnFiles
Ejemplo n.º 10
0
 def checkForLogger(self, sampleName, fileOwner):
     """Checks the EOS directory for a Logger.tgz file,
     if not found, escapes
     'sampleName' takes the name of the sample as a string
     'fileOwner' takes the file owner on EOS as a string
     """
     lfn = self.dataset_details['LFN']
     if len(eostools.matchingFiles(lfn, "Logger.tgz")) == 1 or self._force:
         self.createLoggerTemporaryFile()
         return True
     else:
         raise NameError("ERROR: No Logger.tgz file found for\
         this sample. If you would like to preceed anyway, \
         please copy Logger.tgz from your local \
         production directory to your production \
         directory on eos.\n")
Ejemplo n.º 11
0
 def checkForLogger(self, sampleName, fileOwner):
     """Checks the EOS directory for a Logger.tgz file,
     if not found, escapes
     'sampleName' takes the name of the sample as a string
     'fileOwner' takes the file owner on EOS as a string
     """
     lfn = self.dataset_details['LFN']
     if len(eostools.matchingFiles(lfn,"Logger.tgz"))==1 or self._force:
         self.createLoggerTemporaryFile()
         return True
     else:
         raise NameError("ERROR: No Logger.tgz file found for\
         this sample. If you would like to preceed anyway, \
         please copy Logger.tgz from your local \
         production directory to your production \
         directory on eos.\n")
Ejemplo n.º 12
0
    def run(self, input):
        #skip for DBS
        if self.user == 'CMS':
            return {'MaskPresent':True,'Report':'Files taken from DBS'}
        
        dir = input['FindOnCastor']['Directory']
        mask = "IntegrityCheck"
        file_mask = []  

        report = None
        if (hasattr(self.options,'check') and self.options.check) or not hasattr(self.options,'check'):
            file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask)

            if file_mask:
                from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem
                p = PublishToFileSystem(mask)
                report = p.get(dir)
        return {'MaskPresent':report is not None,'Report':report}
Ejemplo n.º 13
0
    def run(self, input):
        #skip for DBS
        if self.user == 'CMS':
            return {'MaskPresent':True,'Report':'Files taken from DBS'}
        
        dir = input['FindOnCastor']['Directory']
        mask = "IntegrityCheck"
        file_mask = []  

        report = None
        if (hasattr(self.options,'check') and self.options.check) or not hasattr(self.options,'check'):
            file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask)

            if file_mask:
                from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem
                p = PublishToFileSystem(mask)
                report = p.get(dir)
        return {'MaskPresent':report is not None,'Report':report}
Ejemplo n.º 14
0
    def checkContiguity(self, targetDir):
        #GET ALL ROOT NAMES
        fileNames = castortools.matchingFiles(targetDir, ".*root")
        
        fileGroups =[]
        groupName = []
        # Loop while there are still filenames that do not belong to a file group
        while len(fileNames)>0:

            # Set filename for this pass as the current first element of the filename array
            filename = fileNames[0]

            # Create a new array to hold the names of the group
            fileGroup = []

            # Strip every filename (temporarily) of its file type, number and leading underscore, so that files from each
            # group (root set) have the same name
            for listItem in fileNames:
                # If names are of the same type (prevents a lot of unneccesary processing)
                if self.checkRootType(listItem) == self.checkRootType(filename):

                    # If item is from grid
                    if self.checkRootType(listItem):
                        #If items are the same
                        if self._stdNameFromGrid(listItem)==self._stdNameFromGrid(filename):
                            #print listItem
                            fileGroup.append(listItem)
                            

                    # If item is not from grid
                    elif listItem.rstrip("_[1234567890]*\.root")==filename.rstrip("_[1234567890]*\.root"):
                        # If the file name matches that of the first element in the fileNames array, they are of the same
                        # file group, so add to the fileGroup array
                        fileGroup.append(listItem)
                    

            # Remove the filenames that have been grouped, from the original filenames array,
            # so they do not get processed twice
            for item in fileGroup:
                fileNames.remove(item)

            # Add the new fileGroup to the array of fileGroups
            fileGroups.append(fileGroup)
            

        # Define a flag variable to check for incontiguous root sets
        groupFlag = True
        setFlag = True
        validity = []
        # Count through the groups
        for group in fileGroups:
            
            # Set name of group to be returned
            groupName = ""
            
            # Set an array for numbers
            numbers = []

            
            if self._checkIfNamed(group[0]):
                validity.append(group[0])
                print group[0]
            else:
                # Exract the filenumber from each file in the group and add it to the numbers array
                if self.checkRootType(group[0]):
                    for element in group:
                        num = element.split("_")[-3]
                        numbers.append(int(num))
                else:

                    for element in group:
                        num = element.rstrip(".root").split("_")[-1]
                        numbers.append(int(num))

                count = 0
                # Sort Numbers so that they are in ascending order
                numbers.sort()
                if numbers[0] == 1: count +=1
                # Check that all numbers are there and index every element
                for i in numbers:
                    # If an element is erroneous call up a flag and move on to the next set
                    if i != count:
                        groupFlag = False


                    count+=1
                # Create names for file groups
                if self.checkRootType(group[0]):
                    # Create name for grid type in format: name_[a-n]_identifier_XXX.root
                    arr = group[0].split("_")
                    arr[-1] = "XXX.root"
                    arr[-3] = "["+str(numbers[0])+"-" + str(numbers[-1])+"]"
                    groupName = "_".join(arr)
                    print groupName
                else:
                    # Create name for normal type in format name_[a-n].root
                    groupName = group[0].rstrip(str(numbers[0])+".root") +"["+str(numbers[0])+"-"+ str(numbers[-1])+"].root"
                    print groupName


                # Append group name with contiguity to return array
                if groupFlag==True:
                    validity.append(groupName+": CONTIGUOUS")
                else:
                    validity.append(groupName+": NON-CONTIGUOUS")
                    setFlag = False


        # If there are non-contiguous file sets, return false and print error message.
        # Otherwise return true

        if setFlag==False:
            print "There are non-contigious root files"
            validity.append("INVALID")
            
        else:
            print "Root files are all contiguous"
            validity.append("VALID")
        return validity
Ejemplo n.º 15
0
if len(args) != 2:
    parser.print_help()
    sys.exit(1)

dir = args[0]
regexp = args[1]


exists = castortools.fileExists( dir )
if not exists:
    print 'sourceFileList: directory does not exist. Exiting'
    sys.exit(1)


files = castortools.matchingFiles( dir, regexp)

mask = "IntegrityCheck"
file_mask = []  
if options.check:
    file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask)
bad_files = {}    
if options.check and file_mask:
    from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem
    p = PublishToFileSystem(mask)
    report = p.get(dir)
    if report is not None and report:
        dup = report.get('ValidDuplicates',{})
        for name, status in report['Files'].iteritems():
            if not status[0]:
                bad_files[name] = 'MarkedBad'
Ejemplo n.º 16
0
(options, args) = parser.parse_args()

if len(args) != 2:
    parser.print_help()
    sys.exit(1)

dir = args[0]
regexp = args[1]

exists = castortools.fileExists(dir)
if not exists:
    print 'sourceFileList: directory does not exist. Exiting'
    sys.exit(1)

files = castortools.matchingFiles(dir, regexp)

mask = "IntegrityCheck"
file_mask = []
if options.check:
    file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask)
bad_files = {}
if options.check and file_mask:
    from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem
    p = PublishToFileSystem(mask)
    report = p.get(dir)
    if report is not None and report:
        dup = report.get('ValidDuplicates', {})
        for name, status in report['Files'].iteritems():
            if not status[0]:
                bad_files[name] = 'MarkedBad'
Ejemplo n.º 17
0
 def buildListOfFiles(self, pattern='.*root'):
     '''fills list of files, taking all root files matching the pattern in the castor dir'''
     self.files = castortools.matchingFiles(self.castorDir, pattern)
Ejemplo n.º 18
0
 def buildListOfFiles(self, pattern='.*root'):
     '''fills list of files, taking all root files matching the pattern in the castor dir'''
     self.files = castortools.matchingFiles( self.castorDir, pattern )
Ejemplo n.º 19
0
parser.add_option("-f", "--force", action="store_true",
                  dest="force",
                  help="force overwrite",
                  default=False)


(options,args) = parser.parse_args()

if len(args)!=3:
    parser.print_help()
    sys.exit(1)

dir1 = args[0]
dir2 = args[1]
regexp = args[2]


files = castortools.matchingFiles( dir1, regexp )

if options.negate:
    print 'NOT copying ',  
    pprint.pprint(files)
else:
    print 'Copying ',  
    pprint.pprint(files)

    castortools.cmsStage( dir2, files, options.force) 
    
print 'from:', dir1
print 'to  :', dir2
Ejemplo n.º 20
0
Example (just try, the -n option negates the command!):\ncmsStageAll.py /store/cmst3/user/cbern/CMG/HT/Run2011A-PromptReco-v1/AOD/PAT_CMG /tmp '.*\.root' -n\n\nIMPORTANT NOTE: castor directories must be provided as logical file names (LFN), starting by /store."""

parser.add_option("-n", "--negate", action="store_true", dest="negate", help="do not proceed", default=False)
parser.add_option("-f", "--force", action="store_true", dest="force", help="force overwrite", default=False)


(options, args) = parser.parse_args()

if len(args) != 3:
    parser.print_help()
    sys.exit(1)

dir1 = args[0]
dir2 = args[1]
regexp = args[2]


files = castortools.matchingFiles(dir1, regexp)

if options.negate:
    print "NOT copying ",
    pprint.pprint(files)
else:
    print "Copying ",
    pprint.pprint(files)

    castortools.cmsStage(dir2, files, options.force)

print "from:", dir1
print "to  :", dir2