Example #1
0
def processFiles( regexp, files ):
    
    global chunkNumber

    if len(files) == 0:
        print 'processFiles: no file in input'
        sys.exit(2)

    print 'Processing files:'
    pprint.pprint( files )
    
    process = cms.Process("COPY")

    process.source = cms.Source(
        "PoolSource",
        fileNames = cms.untracked.vstring( files ),
        noEventSort = cms.untracked.bool(True),
        duplicateCheckMode = cms.untracked.string('noDuplicateCheck')
        )
    
    # build output file name
    file = os.path.basename( files[0] )
    (prefix, index) = castortools.filePrefixAndIndex( regexp, file)
    
    tmpRootFile = '/tmp/%s_chunk%d.root' % (prefix,chunkNumber)

    print '  destination: ', tmpRootFile
    process.aod = cms.OutputModule(
        "PoolOutputModule",
        fileName = cms.untracked.string( tmpRootFile ),
        outputCommands = cms.untracked.vstring( 'keep *' )
        )
    

    process.outpath = cms.EndPath(process.aod)

    outFile = open("tmpConfig.py","w")
    outFile.write("import FWCore.ParameterSet.Config as cms\n")
    outFile.write(process.dumpPython())
    outFile.close()

    chunkNumber = chunkNumber+1

    if options.negate == True:
        return

    chunkDir = castortools.createSubDir( castorDir, 'Chunks' )
    
    os.system("cmsRun tmpConfig.py")
    print 'done.'
    rfcp = "rfcp %s %s" % (tmpRootFile, chunkDir)
    print rfcp,'...'
    os.system( rfcp )
    os.system("rm %s" % tmpRootFile)
    print 'temporary files removed.'
Example #2
0
def processFiles(regexp, files):

    global chunkNumber

    if len(files) == 0:
        print 'processFiles: no file in input'
        sys.exit(2)

    print 'Processing files:'
    pprint.pprint(files)

    process = cms.Process("COPY")

    process.source = cms.Source(
        "PoolSource",
        fileNames=cms.untracked.vstring(files),
        noEventSort=cms.untracked.bool(True),
        duplicateCheckMode=cms.untracked.string('noDuplicateCheck'))

    # build output file name
    file = os.path.basename(files[0])
    (prefix, index) = castortools.filePrefixAndIndex(regexp, file)

    tmpRootFile = '/tmp/%s_chunk%d.root' % (prefix, chunkNumber)

    print '  destination: ', tmpRootFile
    process.aod = cms.OutputModule(
        "PoolOutputModule",
        fileName=cms.untracked.string(tmpRootFile),
        outputCommands=cms.untracked.vstring('keep *'))

    process.outpath = cms.EndPath(process.aod)

    outFile = open("tmpConfig.py", "w")
    outFile.write("import FWCore.ParameterSet.Config as cms\n")
    outFile.write(process.dumpPython())
    outFile.close()

    chunkNumber = chunkNumber + 1

    if options.negate == True:
        return

    chunkDir = castortools.createSubDir(castorDir, 'Chunks')

    os.system("cmsRun tmpConfig.py")
    print 'done.'
    rfcp = "rfcp %s %s" % (tmpRootFile, chunkDir)
    print rfcp, '...'
    os.system(rfcp)
    os.system("rm %s" % tmpRootFile)
    print 'temporary files removed.'
Example #3
0
castorDir = args[0]
regexp1 = args[1]
regexp2 = args[2]

(clean1, dirty1) = castortools.cleanFiles( castorDir, regexp1, options.cleanTolerance)
(clean2, dirty2) = castortools.cleanFiles( castorDir, regexp2, options.cleanTolerance)

print 'dirty files, 1: '
pprint.pprint( dirty1 )

print 'dirty files, 2: '
pprint.pprint( dirty2 )

if options.removeDirty:
    trash = 'Dirty'
    absTrash = castortools.createSubDir( trash )
    castortools.remove( absTrash, dirty1 )
    castortools.remove( absTrash, dirty2 )
elif len(dirty1) or len(dirty2):
    print 'to remove dirty files in both collections, run again with option -d'

single = castortools.sync( regexp1, clean1, regexp2, clean2 )
print 'single files:'
pprint.pprint( single )


if options.removeSingle:
    trash = 'Single'
    absTrash = castortools.createSubDir( trash )
    castortools.remove( absTrash, single )
elif len(single):
Example #4
0
parser.add_option("-n", "--negate", action="store_true",
                  dest="negate",
                  help="do not proceed",
                  default=False)
parser.add_option("-k", "--kill", action="store_true",
                  dest="kill",
                  help="really remove the files",
                  default=False)

(options,args) = parser.parse_args()

if len(args)!=2:
    parser.print_help()
    sys.exit(1)

castorDir = args[0]
regexp = args[1]

files = castortools.matchingFiles( castorDir, regexp )

if options.negate:
    print 'NOT removing ',  
    pprint.pprint(files)
else:
    if options.kill == False:
        pprint.pprint(files)
        trash = castortools.createSubDir( castorDir, 'Trash')
        castortools.move( trash, files )
    else:
        castortools.remove( files )
Example #5
0
parser.usage = "%prog <castor dir> <regexp pattern>: place all empty files in a trash. This script is based on edmFileUtil, so it's pretty slow. The files will be moved to the trash only at the end of the processing. Does anybody know of a fast way to get the number of events in an EDM file? If yes contact Colin.\n\nExample (just try, the -n option negates the command!):\nremoveEmptyFiles.py  /castor/cern.ch/user/c/cbern/CMSSW312/SinglePions '.*\.root' -n"
parser.add_option("-n", "--negate", action="store_true",
                  dest="negate",
                  help="do not proceed",
                  default=False)

(options,args) = parser.parse_args()

if len(args)!=2:
    parser.print_help()
    sys.exit(1)

castorDir = args[0]
regexp = args[1]

if options.negate:
    print 'files will NOT be removed'

files = castortools.emptyFiles( castorDir, regexp,
                                castortools.isCastorDir(castorDir) )

if options.negate:
    print 'NOT removing ',  
    pprint.pprint(files)
else:
    print 'Removing ',  
    pprint.pprint(files)
    trash = castortools.createSubDir( castorDir, 'Trash')
    print trash
    castortools.move( trash, files )
Example #6
0
regexp2 = args[2]

(clean1, dirty1) = castortools.cleanFiles(castorDir, regexp1,
                                          options.cleanTolerance)
(clean2, dirty2) = castortools.cleanFiles(castorDir, regexp2,
                                          options.cleanTolerance)

print 'dirty files, 1: '
pprint.pprint(dirty1)

print 'dirty files, 2: '
pprint.pprint(dirty2)

if options.removeDirty:
    trash = 'Dirty'
    absTrash = castortools.createSubDir(trash)
    castortools.remove(absTrash, dirty1)
    castortools.remove(absTrash, dirty2)
elif len(dirty1) or len(dirty2):
    print 'to remove dirty files in both collections, run again with option -d'

single = castortools.sync(regexp1, clean1, regexp2, clean2)
print 'single files:'
pprint.pprint(single)

if options.removeSingle:
    trash = 'Single'
    absTrash = castortools.createSubDir(trash)
    castortools.remove(absTrash, single)
elif len(single):
    print 'to remove single files in both collections, run again with option -s'
Example #7
0
import sys, os, re, pprint
import castortools

parser = OptionParser()
parser.usage = "%prog <castor dir> <regexp pattern>: place all empty files in a trash. This script is based on edmFileUtil, so it's pretty slow. The files will be moved to the trash only at the end of the processing. Does anybody know of a fast way to get the number of events in an EDM file? If yes contact Colin.\n\nExample (just try, the -n option negates the command!):\nremoveEmptyFiles.py  /castor/cern.ch/user/c/cbern/CMSSW312/SinglePions '.*\.root' -n"
parser.add_option("-n", "--negate", action="store_true", dest="negate", help="do not proceed", default=False)

(options, args) = parser.parse_args()

if len(args) != 2:
    parser.print_help()
    sys.exit(1)

castorDir = args[0]
regexp = args[1]

if options.negate:
    print "files will NOT be removed"

files = castortools.emptyFiles(castorDir, regexp, castortools.isCastorDir(castorDir))

if options.negate:
    print "NOT removing ",
    pprint.pprint(files)
else:
    print "Removing ",
    pprint.pprint(files)
    trash = castortools.createSubDir(castorDir, "Trash")
    print trash
    castortools.move(trash, files)