def publish(self, report): """Publish a file""" for path in report['PathList']: _, name = tempfile.mkstemp('.txt', text=True) json.dump(report, file(name, 'w'), sort_keys=True, indent=4) fname = '%s_%s.txt' % (self.parent, report['DateCreated']) #rename the file locally - TODO: This is a potential problem nname = os.path.join(os.path.dirname(name), fname) os.rename(name, nname) castor_path = castortools.lfnToCastor(path) new_name = '%s/%s' % (castor_path, fname) castortools.xrdcp(nname, path) time.sleep(1) if castortools.fileExists(new_name): #castortools.move(old_name, new_name) #castortools.chmod(new_name, '644') print "File published: '%s'" % castortools.castorToLFN( new_name) os.remove(nname) else: pathhash = path.replace('/', '.') hashed_name = 'PublishToFileSystem-%s-%s' % (pathhash, fname) shutil.move(nname, hashed_name) print >> sys.stderr, "Cannot write to directory '%s' - written to local file '%s' instead." % ( castor_path, hashed_name)
def publish(self, report): """Publish a file""" for path in report['PathList']: _, name = tempfile.mkstemp('.txt', text=True) json.dump(report, file(name,'w'), sort_keys=True, indent=4) fname = '%s_%s.txt' % (self.parent, report['DateCreated']) #rename the file locally - TODO: This is a potential problem nname = os.path.join(os.path.dirname(name),fname) os.rename(name, nname) castor_path = castortools.lfnToCastor(path) new_name = '%s/%s' % (castor_path, fname) castortools.xrdcp(nname,path) time.sleep(1) if castortools.fileExists(new_name): #castortools.move(old_name, new_name) #castortools.chmod(new_name, '644') print "File published: '%s'" % castortools.castorToLFN(new_name) os.remove(nname) else: pathhash = path.replace('/','.') hashed_name = 'PublishToFileSystem-%s-%s' % (pathhash, fname) shutil.move(nname, hashed_name) print >> sys.stderr, "Cannot write to directory '%s' - written to local file '%s' instead." % (castor_path, hashed_name)
def test(self, previous=None, timeout=-1): if not castortools.fileExists(self.directory): raise Exception( "The top level directory '%s' for this dataset does not exist" % self.directory) self.query() test_results = {} #support updating to speed things up prev_results = {} if previous is not None: for name, status in previous['Files'].iteritems(): prev_results[name] = status filesToTest = self.sortByBaseDir(self.listRootFiles(self.directory)) for dir, filelist in filesToTest.iteritems(): filemask = {} #apply a UNIX wildcard if specified filtered = filelist if self.options.wildcard is not None: filtered = fnmatch.filter(filelist, self.options.wildcard) if not filtered: print >> sys.stderr, "Warning: The wildcard '%s' does not match any files in '%s'. Please check you are using quotes." % ( self.options.wildcard, self.directory) count = 0 for ff in filtered: fname = os.path.join(dir, ff) lfn = castortools.castorToLFN(fname) #try to update from the previous result if available if lfn in prev_results and prev_results[lfn][0]: if self.options.printout: print '[%i/%i]\t Skipping %s...' % ( count, len(filtered), fname), OK, num = prev_results[lfn] else: if self.options.printout: print '[%i/%i]\t Checking %s...' % ( count, len(filtered), fname), OK, num = self.testFileTimeOut(lfn, timeout) filemask[ff] = (OK, num) if self.options.printout: print(OK, num) if OK: self.eventsSeen += num count += 1 test_results[castortools.castorToLFN(dir)] = filemask self.test_result = test_results self.duplicates, self.bad_jobs, sum_dup = self.stripDuplicates() #remove duplicate entries from the event count self.eventsSeen -= sum_dup
def run(self, input): if self.user == 'CMS': return {'Topdir':None,'Directory':None} topdir = castortools.lfnToCastor(castorBaseDir(user=self.user)) directory = '%s/%s' % (topdir,self.dataset) # directory = directory.replace('//','/') if not castortools.fileExists(directory): if hasattr(self,'create') and self.create: castortools.createCastorDir(directory) #castortools.chmod(directory,'775') if not castortools.isDirectory(directory): raise Exception("Dataset directory '%s' does not exist or could not be created" % directory) return {'Topdir':topdir,'Directory':directory}
def run(self, input): if self.user == 'CMS': return {'Topdir':None,'Directory':None} topdir = castortools.lfnToCastor(castorBaseDir(user=self.user)) directory = '%s/%s' % (topdir,self.dataset) directory = directory.replace('//','/') if not castortools.fileExists(directory): if hasattr(self,'create') and self.create: castortools.createCastorDir(directory) #castortools.chmod(directory,'775') if not castortools.isDirectory(directory): raise Exception("Dataset directory '%s' does not exist or could not be created" % directory) return {'Topdir':topdir,'Directory':directory}
def test(self, previous = None, timeout = -1): if not castortools.fileExists(self.directory): raise Exception("The top level directory '%s' for this dataset does not exist" % self.directory) self.query() test_results = {} #support updating to speed things up prev_results = {} if previous is not None: for name, status in previous['Files'].iteritems(): prev_results[name] = status filesToTest = self.sortByBaseDir(self.listRootFiles(self.directory)) for dir, filelist in filesToTest.iteritems(): filemask = {} #apply a UNIX wildcard if specified filtered = filelist if self.options.wildcard is not None: filtered = fnmatch.filter(filelist, self.options.wildcard) if not filtered: print >> sys.stderr, "Warning: The wildcard '%s' does not match any files in '%s'. Please check you are using quotes." % (self.options.wildcard,self.directory) count = 0 for ff in filtered: fname = os.path.join(dir, ff) lfn = castortools.castorToLFN(fname) #try to update from the previous result if available if lfn in prev_results and prev_results[lfn][0]: if self.options.printout: print '[%i/%i]\t Skipping %s...' % (count, len(filtered),fname), OK, num = prev_results[lfn] else: if self.options.printout: print '[%i/%i]\t Checking %s...' % (count, len(filtered),fname), OK, num = self.testFileTimeOut(lfn, timeout) filemask[ff] = (OK,num) if self.options.printout: print (OK, num) if OK: self.eventsSeen += num count += 1 test_results[castortools.castorToLFN(dir)] = filemask self.test_result = test_results self.duplicates, self.bad_jobs, sum_dup = self.stripDuplicates() #remove duplicate entries from the event count self.eventsSeen -= sum_dup
def run(self, input): """Check that the directory is writable""" if self.user == 'CMS': return {'Directory':None,'WriteAccess':True} dir = input['FindOnCastor']['Directory'] if self.options.check: _, name = tempfile.mkstemp('.txt',text=True) testFile = file(name,'w') testFile.write('Test file') testFile.close() store = castortools.castorToLFN(dir) #this is bad, but castortools is giving me problems if not os.system('cmsStage %s %s' % (name,store)): fname = '%s/%s' % (dir,os.path.basename(name)) write = castortools.fileExists(fname) if write: castortools.rm(fname) else: raise Exception("Failed to write to directory '%s'" % dir) os.remove(name) return {'Directory':dir,'WriteAccess':True}
"--force", dest="force", action="store_true", default=False, help= "Skip files or directories that don't contain trees, without raising an error" ) (options, args) = parser.parse_args() if len(args) < 2: raise RuntimeError, 'Expecting at least two arguments' locdir = args[0] remdir = os.path.join(args[1], os.path.basename(locdir)) if not eostools.isEOS(remdir): raise RuntimeError, 'Remote directory should be on EOS.' if (not eostools.fileExists(locdir)) or eostools.isFile(locdir): raise RuntimeError, 'The local directory that should contain the trees does not exist.' # check removed to allow for top-up of tree productions # if eostools.fileExists('%s/%s' % (remdir,locdir)): # raise RuntimeError, 'The remote EOS directory where the trees should be archived already exists.' alldsets = [p for p in glob.glob(locdir + "/*") if os.path.isdir(p)] if not options.allowSymlinks: symlinks = [d for d in alldsets if os.path.islink(d)] if symlinks: print "The following directories are symlinks and will not be considered (run with --allowSymlinks to include them): ", ", ".join( map(os.path.basename, symlinks)) alldsets = [d for d in alldsets if not os.path.islink(d)] dsets = [ d for d in alldsets if [
""" parser = OptionParser(usage=usage) parser.add_option("-t", dest="treeproducername", type='string', default="myTreeProducer", help='Name of the tree producer module') parser.add_option("-f", dest="friendtreestring", type='string', default="evVarFriend", help='String identifying friend trees (must be contained in the root file name)') parser.add_option("-T", dest="treename", type='string', default="tree.root", help='Name of the tree file') parser.add_option("--dset", dest="dset", type='string', default=None, help='Name of the dataset to process') (options, args) = parser.parse_args() if len(args)<2: raise RuntimeError, 'Expecting at least two arguments' locdir = args[0] remdir = args[1] if not eostools.isEOS(remdir): raise RuntimeError, 'Remote directory should be on EOS.' if (not eostools.fileExists(locdir)) or eostools.isFile(locdir): raise RuntimeError, 'The local directory that should contain the trees does not exist.' # check removed to allow for top-up of tree productions # if eostools.fileExists('%s/%s' % (remdir,locdir)): # raise RuntimeError, 'The remote EOS directory where the trees should be archived already exists.' alldsets = eostools.ls(locdir) dsets = [d for d in alldsets if [ fname for fname in eostools.ls(d) if options.friendtreestring in fname]==[] ] if options.dset: dsets = [d for d in dsets if options.dset in d] friends = [d for d in alldsets if d not in dsets] if options.dset: friends = [d for d in friends if options.dset in d] tocopy = [] for d in dsets: if eostools.isFile(d): raise RuntimeError, 'File found in local directory.'
'String identifying friend trees (must be contained in the root file name)' ) parser.add_option("-T", dest="treename", type='string', default="tree.root", help='Name of the tree file') (options, args) = parser.parse_args() if len(args) < 2: raise RuntimeError, 'Expecting at least two arguments' locdir = args[0] remdir = args[1] if not eostools.isEOS(remdir): raise RuntimeError, 'Remote directory should be on EOS.' if (not eostools.fileExists(locdir)) or eostools.isFile(locdir): raise RuntimeError, 'The local directory that should contain the trees does not exist.' if eostools.fileExists('%s/%s' % (remdir, locdir)): raise RuntimeError, 'The remote EOS directory where the trees should be archived already exists.' alldsets = eostools.ls(locdir) dsets = [ d for d in alldsets if [ fname for fname in eostools.ls(d) if options.friendtreestring in fname ] == [] ] friends = [d for d in alldsets if d not in dsets] tocopy = [] for d in dsets:
pattern = fnmatch.translate( options.wildcard ) # preparing castor dir ----------------- import CMGTools.Production.castorBaseDir as castorBaseDir try: cdir = castorBaseDir.castorBaseDir( options.user ) except: print 'user does not have a castor base dir' sys.exit(1) cdir += sampleName if not castortools.fileExists( cdir ): print 'Directory ', cdir, 'does not exist' print 'Please check the sample name, and the user. You can specify a different user using the -u option' sys.exit(2) # making local source directory --------- ldir = "./"+sampleName mkdir = 'mkdir -p ' + ldir print mkdir if not options.negate: os.system( mkdir ) # copy
dest = args[1] if eostools.isDirectory(dest): dest = os.path.join(dest, os.path.basename(args[0])) sleep_lengths = [1, 10, 60, 600, 1800] return_code = 0 for i in xrange(5): #sleep for a while before running time.sleep(sleep_lengths[i]) try: #run cmsStage print 'cmsStage %s [%d/5]' % (' '.join(argv), i + 1) main(argv) except SystemExit, e: print "cmsStage exited with code '%s'. Retrying... [%d/5]" % ( str(e), i + 1) return_code = e.code #sleep again before checking time.sleep(3) if eostools.fileExists(dest) and eostools.isFile(dest): if source.size() == destination.size(): return_code = 0 break sys.exit(return_code)
#find the destination LFN dest = args[1] if eostools.isDirectory(dest): dest = os.path.join(dest,os.path.basename(args[0])) sleep_lengths = [1,10,60,600,1800] return_code = 0 for i in xrange(5): #sleep for a while before running time.sleep(sleep_lengths[i]) try: #run cmsStage print 'cmsStage %s [%d/5]' % (' '.join(argv) , i+1) main(argv) except SystemExit, e: print "cmsStage exited with code '%s'. Retrying... [%d/5]" % ( str(e), i+1 ) return_code = e.code #sleep again before checking time.sleep(3) if eostools.fileExists(dest) and eostools.isFile(dest): if source.size() == destination.size(): return_code = 0 break sys.exit(return_code)
def isDirOnCastor(self, file ): if castortools.fileExists( file): return True else: return False
parser = OptionParser() parser.usage = "%prog <dir> <regexp> : format a set of root files matching a regexp in a directory, as an input to the PoolSource. \n\nExample (just try!):\nsourceFileList.py /castor/cern.ch/user/c/cbern/CMSSW312/SinglePions '.*\.root'" parser.add_option("-c", "--check", dest="check", default=False, action='store_true',help='Check filemask if available') (options,args) = parser.parse_args() if len(args) != 2: parser.print_help() sys.exit(1) dir = args[0] regexp = args[1] exists = castortools.fileExists( dir ) if not exists: print 'sourceFileList: directory does not exist. Exiting' sys.exit(1) files = castortools.matchingFiles( dir, regexp) mask = "IntegrityCheck" file_mask = [] if options.check: file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask) bad_files = {} if options.check and file_mask: from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem p = PublishToFileSystem(mask)
"--check", dest="check", default=False, action='store_true', help='Check filemask if available') (options, args) = parser.parse_args() if len(args) != 2: parser.print_help() sys.exit(1) dir = args[0] regexp = args[1] exists = castortools.fileExists(dir) if not exists: print 'sourceFileList: directory does not exist. Exiting' sys.exit(1) files = castortools.matchingFiles(dir, regexp) mask = "IntegrityCheck" file_mask = [] if options.check: file_mask = castortools.matchingFiles(dir, '^%s_.*\.txt$' % mask) bad_files = {} if options.check and file_mask: from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem p = PublishToFileSystem(mask) report = p.get(dir)
safe_name = safe_name[:-1] config.add_section(safe_name) directory = '%s/%s' % (topdir, d) if opts.tier: directory = os.path.join(directory, opts.tier) directory = directory.replace('//', '/') config.set(safe_name, 'CMSSW.datasetpath', d) lfn = castortools.castorToLFN(directory) config.set(safe_name, 'USER.user_remote_dir', lfn) output_dirs.append(lfn) #create the directory on EOS if not castortools.fileExists(directory): castortools.createCastorDir(directory) castortools.chmod(directory, '775') if not castortools.isDirectory(directory): raise Exception( "Dataset directory '%s' does not exist or could not be created" % directory) config.write(file(opts.output, 'wb')) from logger import logger logDir = 'Logger' os.mkdir(logDir) log = logger(logDir) log.logCMSSW() log.addFile(os.path.join(os.getcwd(), opts.cfg))
default="tree.root", help='Name of the tree file') parser.add_option("--dset", dest="dset", type='string', default=None, help='Name of the dataset to process') (options, args) = parser.parse_args() if len(args) < 2: raise RuntimeError, 'Expecting at least two arguments' locdir = args[0] remdir = args[1] if not eostools.isEOS(remdir): raise RuntimeError, 'Remote directory should be on EOS.' if (not eostools.fileExists(locdir)) or eostools.isFile(locdir): raise RuntimeError, 'The local directory that should contain the trees does not exist.' # check removed to allow for top-up of tree productions # if eostools.fileExists('%s/%s' % (remdir,locdir)): # raise RuntimeError, 'The remote EOS directory where the trees should be archived already exists.' alldsets = eostools.ls(locdir) dsets = [ d for d in alldsets if [ fname for fname in eostools.ls(d) if options.friendtreestring in fname ] == [] ] if options.dset: dsets = [d for d in dsets if options.dset in d] friends = [d for d in alldsets if d not in dsets]
safe_name = safe_name[:-1] config.add_section(safe_name) directory = '%s/%s' % (topdir,d) if opts.tier: directory = os.path.join(directory,opts.tier) directory = directory.replace('//','/') config.set(safe_name,'CMSSW.datasetpath',d) lfn = castortools.castorToLFN(directory) config.set(safe_name,'USER.user_remote_dir',lfn) output_dirs.append(lfn) #create the directory on EOS if not castortools.fileExists(directory): castortools.createCastorDir(directory) castortools.chmod(directory,'775') if not castortools.isDirectory(directory): raise Exception("Dataset directory '%s' does not exist or could not be created" % directory) config.write(file(opts.output,'wb')) from logger import logger logDir = 'Logger' os.mkdir(logDir) log = logger( logDir ) log.logCMSSW() log.addFile( os.path.join( os.getcwd(), opts.cfg) ) log.addFile( os.path.join( os.getcwd(), opts.output) )
"--output", dest="output", help="Output file name.", default="source_cff.py") (options, args) = parser.parse_args() if len(args) != 1: parser.print_help() sys.exit(1) sampleName = args[0].rstrip('/') # checking castor dir ----------------- import CMGTools.Production.castorBaseDir as castorBaseDir cdir = castortools.lfnToCastor(castorBaseDir.castorBaseDir(options.user)) cdir += sampleName pattern = fnmatch.translate(options.wildcard) if not castortools.fileExists(cdir): print 'importNewSource: castor directory does not exist. Exit!' sys.exit(1) # sourceFileList = 'sourceFileList.py -c %s "%s" > %s' % (cdir, pattern, sourceFile) from CMGTools.Production.doImportNewSource import doImportNewSource doImportNewSource(sampleName, 'sourceFileList.py -c %s "%s"' % (cdir, pattern), options.output)