def processFiles( regexp, files ): global chunkNumber if len(files) == 0: print 'processFiles: no file in input' sys.exit(2) print 'Processing files:' pprint.pprint( files ) process = cms.Process("COPY") process.source = cms.Source( "PoolSource", fileNames = cms.untracked.vstring( files ), noEventSort = cms.untracked.bool(True), duplicateCheckMode = cms.untracked.string('noDuplicateCheck') ) # build output file name file = os.path.basename( files[0] ) (prefix, index) = castortools.filePrefixAndIndex( regexp, file) tmpRootFile = '/tmp/%s_chunk%d.root' % (prefix,chunkNumber) print ' destination: ', tmpRootFile process.aod = cms.OutputModule( "PoolOutputModule", fileName = cms.untracked.string( tmpRootFile ), outputCommands = cms.untracked.vstring( 'keep *' ) ) process.outpath = cms.EndPath(process.aod) outFile = open("tmpConfig.py","w") outFile.write("import FWCore.ParameterSet.Config as cms\n") outFile.write(process.dumpPython()) outFile.close() chunkNumber = chunkNumber+1 if options.negate == True: return chunkDir = castortools.createSubDir( castorDir, 'Chunks' ) os.system("cmsRun tmpConfig.py") print 'done.' rfcp = "rfcp %s %s" % (tmpRootFile, chunkDir) print rfcp,'...' os.system( rfcp ) os.system("rm %s" % tmpRootFile) print 'temporary files removed.'
def processFiles(regexp, files): global chunkNumber if len(files) == 0: print 'processFiles: no file in input' sys.exit(2) print 'Processing files:' pprint.pprint(files) process = cms.Process("COPY") process.source = cms.Source( "PoolSource", fileNames=cms.untracked.vstring(files), noEventSort=cms.untracked.bool(True), duplicateCheckMode=cms.untracked.string('noDuplicateCheck')) # build output file name file = os.path.basename(files[0]) (prefix, index) = castortools.filePrefixAndIndex(regexp, file) tmpRootFile = '/tmp/%s_chunk%d.root' % (prefix, chunkNumber) print ' destination: ', tmpRootFile process.aod = cms.OutputModule( "PoolOutputModule", fileName=cms.untracked.string(tmpRootFile), outputCommands=cms.untracked.vstring('keep *')) process.outpath = cms.EndPath(process.aod) outFile = open("tmpConfig.py", "w") outFile.write("import FWCore.ParameterSet.Config as cms\n") outFile.write(process.dumpPython()) outFile.close() chunkNumber = chunkNumber + 1 if options.negate == True: return chunkDir = castortools.createSubDir(castorDir, 'Chunks') os.system("cmsRun tmpConfig.py") print 'done.' rfcp = "rfcp %s %s" % (tmpRootFile, chunkDir) print rfcp, '...' os.system(rfcp) os.system("rm %s" % tmpRootFile) print 'temporary files removed.'
castorDir = args[0] regexp1 = args[1] regexp2 = args[2] (clean1, dirty1) = castortools.cleanFiles( castorDir, regexp1, options.cleanTolerance) (clean2, dirty2) = castortools.cleanFiles( castorDir, regexp2, options.cleanTolerance) print 'dirty files, 1: ' pprint.pprint( dirty1 ) print 'dirty files, 2: ' pprint.pprint( dirty2 ) if options.removeDirty: trash = 'Dirty' absTrash = castortools.createSubDir( trash ) castortools.remove( absTrash, dirty1 ) castortools.remove( absTrash, dirty2 ) elif len(dirty1) or len(dirty2): print 'to remove dirty files in both collections, run again with option -d' single = castortools.sync( regexp1, clean1, regexp2, clean2 ) print 'single files:' pprint.pprint( single ) if options.removeSingle: trash = 'Single' absTrash = castortools.createSubDir( trash ) castortools.remove( absTrash, single ) elif len(single):
parser.add_option("-n", "--negate", action="store_true", dest="negate", help="do not proceed", default=False) parser.add_option("-k", "--kill", action="store_true", dest="kill", help="really remove the files", default=False) (options,args) = parser.parse_args() if len(args)!=2: parser.print_help() sys.exit(1) castorDir = args[0] regexp = args[1] files = castortools.matchingFiles( castorDir, regexp ) if options.negate: print 'NOT removing ', pprint.pprint(files) else: if options.kill == False: pprint.pprint(files) trash = castortools.createSubDir( castorDir, 'Trash') castortools.move( trash, files ) else: castortools.remove( files )
parser.usage = "%prog <castor dir> <regexp pattern>: place all empty files in a trash. This script is based on edmFileUtil, so it's pretty slow. The files will be moved to the trash only at the end of the processing. Does anybody know of a fast way to get the number of events in an EDM file? If yes contact Colin.\n\nExample (just try, the -n option negates the command!):\nremoveEmptyFiles.py /castor/cern.ch/user/c/cbern/CMSSW312/SinglePions '.*\.root' -n" parser.add_option("-n", "--negate", action="store_true", dest="negate", help="do not proceed", default=False) (options,args) = parser.parse_args() if len(args)!=2: parser.print_help() sys.exit(1) castorDir = args[0] regexp = args[1] if options.negate: print 'files will NOT be removed' files = castortools.emptyFiles( castorDir, regexp, castortools.isCastorDir(castorDir) ) if options.negate: print 'NOT removing ', pprint.pprint(files) else: print 'Removing ', pprint.pprint(files) trash = castortools.createSubDir( castorDir, 'Trash') print trash castortools.move( trash, files )
regexp2 = args[2] (clean1, dirty1) = castortools.cleanFiles(castorDir, regexp1, options.cleanTolerance) (clean2, dirty2) = castortools.cleanFiles(castorDir, regexp2, options.cleanTolerance) print 'dirty files, 1: ' pprint.pprint(dirty1) print 'dirty files, 2: ' pprint.pprint(dirty2) if options.removeDirty: trash = 'Dirty' absTrash = castortools.createSubDir(trash) castortools.remove(absTrash, dirty1) castortools.remove(absTrash, dirty2) elif len(dirty1) or len(dirty2): print 'to remove dirty files in both collections, run again with option -d' single = castortools.sync(regexp1, clean1, regexp2, clean2) print 'single files:' pprint.pprint(single) if options.removeSingle: trash = 'Single' absTrash = castortools.createSubDir(trash) castortools.remove(absTrash, single) elif len(single): print 'to remove single files in both collections, run again with option -s'
import sys, os, re, pprint import castortools parser = OptionParser() parser.usage = "%prog <castor dir> <regexp pattern>: place all empty files in a trash. This script is based on edmFileUtil, so it's pretty slow. The files will be moved to the trash only at the end of the processing. Does anybody know of a fast way to get the number of events in an EDM file? If yes contact Colin.\n\nExample (just try, the -n option negates the command!):\nremoveEmptyFiles.py /castor/cern.ch/user/c/cbern/CMSSW312/SinglePions '.*\.root' -n" parser.add_option("-n", "--negate", action="store_true", dest="negate", help="do not proceed", default=False) (options, args) = parser.parse_args() if len(args) != 2: parser.print_help() sys.exit(1) castorDir = args[0] regexp = args[1] if options.negate: print "files will NOT be removed" files = castortools.emptyFiles(castorDir, regexp, castortools.isCastorDir(castorDir)) if options.negate: print "NOT removing ", pprint.pprint(files) else: print "Removing ", pprint.pprint(files) trash = castortools.createSubDir(castorDir, "Trash") print trash castortools.move(trash, files)