def buildListOfBadFiles(self): '''fills the list of bad files from the IntegrityCheck log. When the integrity check file is not available, files are considered as good.''' mask = "IntegrityCheck" self.bad_files = {} self.good_files = [] file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask) if file_mask: from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem p = PublishToFileSystem(mask) report = p.get(self.castorDir) if report is not None and report: self.maskExists = True self.report = report dup = report.get('ValidDuplicates',{}) for name, status in report['Files'].iteritems(): # print name, status if not status[0]: self.bad_files[name] = 'MarkedBad' elif dup.has_key(name): self.bad_files[name] = 'ValidDup' else: self.good_files.append( name ) else: raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
def buildListOfBadFiles(self): '''fills the list of bad files from the IntegrityCheck log. When the integrity check file is not available, files are considered as good.''' mask = "IntegrityCheck" self.bad_files = {} self.good_files = [] file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask) if file_mask: from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem p = PublishToFileSystem(mask) report = p.get(self.castorDir) if report is not None and report: self.maskExists = True self.report = report dup = report.get('ValidDuplicates', {}) for name, status in report['Files'].iteritems(): # print name, status if not status[0]: self.bad_files[name] = 'MarkedBad' elif dup.has_key(name): self.bad_files[name] = 'ValidDup' else: self.good_files.append(name) else: raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
def __init__(self, dirLocalOrTgzDirOnCastor, castorTgz, dbsAPI): #self.dbAPI = DatabaseAPI.DatabaseAPI('/afs/cern.ch/user/p/pmeckiff/public/bookkeeping.db') self.dirLocal = None self.tgzDirOnCastor = None self.dirOnCastor = None self.setName = dirLocalOrTgzDirOnCastor self.dbsAPI = dbsAPI # Set Directory name if local local = dirLocalOrTgzDirOnCastor.rstrip('/') castorTgz = castortools.castorToLFN(castorTgz) print castorTgz # Check if local first (obviously) if self.isDirLocal(local ): print "File is on local machine: " + local self.dirLocal = local #if found set class attribute # Check if on castor next elif self.isTgzDirOnCastor(castorTgz): print "File is directory on Castor" self.tgzDirOnCastor = castorTgz # if found set class attribute for i in castortools.matchingFiles(castorTgz.rstrip("/Logger.tgz"), ".*tgz"): print i # If logger is not present but directory exists elif self.isDirOnCastor(castorTgz.rstrip("/Logger.tgz")): print "Directory is valid on Castor, but no logger file is present." self.dirOnCastor = castorTgz.rstrip("/Logger.tgz") # If neither then raise an exception else: raise ValueError( dirLocalOrTgzDirOnCastor + ' is neither a tgz directory on castor (provide a LFN!) nor a local directory')
def get(self, dir): """Finds the lastest file and reads it""" reg = '^%s_.*\.txt$' % self.parent files = castortools.matchingFiles(dir, reg) files = sorted([ (os.path.basename(f), f) for f in files]) if not files: return None return self.read(files[-1][1])
def get(self, dir): """Finds the lastest file and reads it""" reg = '^%s_.*\.txt$' % self.parent files = castortools.matchingFiles(dir, reg) files = sorted([(os.path.basename(f), f) for f in files]) if not files: return None return self.read(files[-1][1])
def checkForLogger( dataset_lfn_dir ): """Checks the EOS directory for a Logger.tgz file, if not found, escapes 'sampleName' takes the name of the sample as a string 'fileOwner' takes the file owner on EOS as a string """ if len( eostools.matchingFiles( dataset_lfn_dir, "Logger.tgz" ) ) == 1: return createLoggerTemporaryFile( dataset_lfn_dir ) else: raise NameError("ERROR: No Logger.tgz file found for this sample. If you would like to preceed anyway, please copy Logger.tgz from your local production directory to your production directory on eos.\n")
def checkForLogger(dataset_lfn_dir): """Checks the EOS directory for a Logger.tgz file, if not found, escapes 'sampleName' takes the name of the sample as a string 'fileOwner' takes the file owner on EOS as a string """ if len(eostools.matchingFiles(dataset_lfn_dir, "Logger.tgz")) == 1: return createLoggerTemporaryFile(dataset_lfn_dir) else: raise NameError( "ERROR: No Logger.tgz file found for this sample. If you would like to preceed anyway, please copy Logger.tgz from your local production directory to your production directory on eos.\n" )
def burrow(LFN): returnFiles = [] files = eostools.matchingFiles(LFN,"[!histo]*.") for i in files: if re.search(i, "histo"): del(files[i]) for file in files: if not re.search("\.",file): for i in burrow(file): returnFiles.append(i) else: returnFiles.append(file) return returnFiles
def burrow(LFN): returnFiles = [] files = eostools.matchingFiles(LFN, "[!histo]*.") for i in files: if re.search(i, "histo"): del (files[i]) for file in files: if not re.search("\.", file): for i in burrow(file): returnFiles.append(i) else: returnFiles.append(file) return returnFiles
def checkForLogger(self, sampleName, fileOwner): """Checks the EOS directory for a Logger.tgz file, if not found, escapes 'sampleName' takes the name of the sample as a string 'fileOwner' takes the file owner on EOS as a string """ lfn = self.dataset_details['LFN'] if len(eostools.matchingFiles(lfn, "Logger.tgz")) == 1 or self._force: self.createLoggerTemporaryFile() return True else: raise NameError("ERROR: No Logger.tgz file found for\ this sample. If you would like to preceed anyway, \ please copy Logger.tgz from your local \ production directory to your production \ directory on eos.\n")
def checkForLogger(self, sampleName, fileOwner): """Checks the EOS directory for a Logger.tgz file, if not found, escapes 'sampleName' takes the name of the sample as a string 'fileOwner' takes the file owner on EOS as a string """ lfn = self.dataset_details['LFN'] if len(eostools.matchingFiles(lfn,"Logger.tgz"))==1 or self._force: self.createLoggerTemporaryFile() return True else: raise NameError("ERROR: No Logger.tgz file found for\ this sample. If you would like to preceed anyway, \ please copy Logger.tgz from your local \ production directory to your production \ directory on eos.\n")
def run(self, input): #skip for DBS if self.user == 'CMS': return {'MaskPresent':True,'Report':'Files taken from DBS'} dir = input['FindOnCastor']['Directory'] mask = "IntegrityCheck" file_mask = [] report = None if (hasattr(self.options,'check') and self.options.check) or not hasattr(self.options,'check'): file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask) if file_mask: from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem p = PublishToFileSystem(mask) report = p.get(dir) return {'MaskPresent':report is not None,'Report':report}
def checkContiguity(self, targetDir): #GET ALL ROOT NAMES fileNames = castortools.matchingFiles(targetDir, ".*root") fileGroups =[] groupName = [] # Loop while there are still filenames that do not belong to a file group while len(fileNames)>0: # Set filename for this pass as the current first element of the filename array filename = fileNames[0] # Create a new array to hold the names of the group fileGroup = [] # Strip every filename (temporarily) of its file type, number and leading underscore, so that files from each # group (root set) have the same name for listItem in fileNames: # If names are of the same type (prevents a lot of unneccesary processing) if self.checkRootType(listItem) == self.checkRootType(filename): # If item is from grid if self.checkRootType(listItem): #If items are the same if self._stdNameFromGrid(listItem)==self._stdNameFromGrid(filename): #print listItem fileGroup.append(listItem) # If item is not from grid elif listItem.rstrip("_[1234567890]*\.root")==filename.rstrip("_[1234567890]*\.root"): # If the file name matches that of the first element in the fileNames array, they are of the same # file group, so add to the fileGroup array fileGroup.append(listItem) # Remove the filenames that have been grouped, from the original filenames array, # so they do not get processed twice for item in fileGroup: fileNames.remove(item) # Add the new fileGroup to the array of fileGroups fileGroups.append(fileGroup) # Define a flag variable to check for incontiguous root sets groupFlag = True setFlag = True validity = [] # Count through the groups for group in fileGroups: # Set name of group to be returned groupName = "" # Set an array for numbers numbers = [] if self._checkIfNamed(group[0]): validity.append(group[0]) print group[0] else: # Exract the filenumber from each file in the group and add it to the numbers array if self.checkRootType(group[0]): for element in group: num = element.split("_")[-3] numbers.append(int(num)) else: for element in group: num = element.rstrip(".root").split("_")[-1] numbers.append(int(num)) count = 0 # Sort Numbers so that they are in ascending order numbers.sort() if numbers[0] == 1: count +=1 # Check that all numbers are there and index every element for i in numbers: # If an element is erroneous call up a flag and move on to the next set if i != count: groupFlag = False count+=1 # Create names for file groups if self.checkRootType(group[0]): # Create name for grid type in format: name_[a-n]_identifier_XXX.root arr = group[0].split("_") arr[-1] = "XXX.root" arr[-3] = "["+str(numbers[0])+"-" + str(numbers[-1])+"]" groupName = "_".join(arr) print groupName else: # Create name for normal type in format name_[a-n].root groupName = group[0].rstrip(str(numbers[0])+".root") +"["+str(numbers[0])+"-"+ str(numbers[-1])+"].root" print groupName # Append group name with contiguity to return array if groupFlag==True: validity.append(groupName+": CONTIGUOUS") else: validity.append(groupName+": NON-CONTIGUOUS") setFlag = False # If there are non-contiguous file sets, return false and print error message. # Otherwise return true if setFlag==False: print "There are non-contigious root files" validity.append("INVALID") else: print "Root files are all contiguous" validity.append("VALID") return validity
if len(args) != 2: parser.print_help() sys.exit(1) dir = args[0] regexp = args[1] exists = castortools.fileExists( dir ) if not exists: print 'sourceFileList: directory does not exist. Exiting' sys.exit(1) files = castortools.matchingFiles( dir, regexp) mask = "IntegrityCheck" file_mask = [] if options.check: file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask) bad_files = {} if options.check and file_mask: from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem p = PublishToFileSystem(mask) report = p.get(dir) if report is not None and report: dup = report.get('ValidDuplicates',{}) for name, status in report['Files'].iteritems(): if not status[0]: bad_files[name] = 'MarkedBad'
(options, args) = parser.parse_args() if len(args) != 2: parser.print_help() sys.exit(1) dir = args[0] regexp = args[1] exists = castortools.fileExists(dir) if not exists: print 'sourceFileList: directory does not exist. Exiting' sys.exit(1) files = castortools.matchingFiles(dir, regexp) mask = "IntegrityCheck" file_mask = [] if options.check: file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask) bad_files = {} if options.check and file_mask: from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem p = PublishToFileSystem(mask) report = p.get(dir) if report is not None and report: dup = report.get('ValidDuplicates', {}) for name, status in report['Files'].iteritems(): if not status[0]: bad_files[name] = 'MarkedBad'
def buildListOfFiles(self, pattern='.*root'): '''fills list of files, taking all root files matching the pattern in the castor dir''' self.files = castortools.matchingFiles(self.castorDir, pattern)
def buildListOfFiles(self, pattern='.*root'): '''fills list of files, taking all root files matching the pattern in the castor dir''' self.files = castortools.matchingFiles( self.castorDir, pattern )
parser.add_option("-f", "--force", action="store_true", dest="force", help="force overwrite", default=False) (options,args) = parser.parse_args() if len(args)!=3: parser.print_help() sys.exit(1) dir1 = args[0] dir2 = args[1] regexp = args[2] files = castortools.matchingFiles( dir1, regexp ) if options.negate: print 'NOT copying ', pprint.pprint(files) else: print 'Copying ', pprint.pprint(files) castortools.cmsStage( dir2, files, options.force) print 'from:', dir1 print 'to :', dir2
Example (just try, the -n option negates the command!):\ncmsStageAll.py /store/cmst3/user/cbern/CMG/HT/Run2011A-PromptReco-v1/AOD/PAT_CMG /tmp '.*\.root' -n\n\nIMPORTANT NOTE: castor directories must be provided as logical file names (LFN), starting by /store.""" parser.add_option("-n", "--negate", action="store_true", dest="negate", help="do not proceed", default=False) parser.add_option("-f", "--force", action="store_true", dest="force", help="force overwrite", default=False) (options, args) = parser.parse_args() if len(args) != 3: parser.print_help() sys.exit(1) dir1 = args[0] dir2 = args[1] regexp = args[2] files = castortools.matchingFiles(dir1, regexp) if options.negate: print "NOT copying ", pprint.pprint(files) else: print "Copying ", pprint.pprint(files) castortools.cmsStage(dir2, files, options.force) print "from:", dir1 print "to :", dir2