def allSampleInfo( sampleName, listLevel ): if listLevel == 3: contents = castortools.ls(castorDir) for c in contents: print c #os.system("rfdir %s | awk '{print \"%s/\"$9}'" % (castorDir,castorDir) ) return print sampleName if listLevel>0: print '------------------------------------------------------------------------------------------------------------' print 'PFN:' print castorDir print 'LFN:' print castortools.castorToLFN(castorDir) if listLevel>1: contents = castortools.ls(castorDir) for c in contents: print c if listLevel>0 and localDir!=None: print 'local:' print localDir if os.path.isdir( localDir ): if listLevel>1: os.system('ls -l ' + localDir ) # print localDir + '*.root' else: if listLevel>0: print 'TO BE IMPORTED' if listLevel>0: print print
def allSampleInfo(sampleName, listLevel): if listLevel == 3: contents = castortools.ls(castorDir) for c in contents: print c #os.system("rfdir %s | awk '{print \"%s/\"$9}'" % (castorDir,castorDir) ) return print sampleName if listLevel > 0: print '------------------------------------------------------------------------------------------------------------' print 'PFN:' print castorDir print 'LFN:' print castortools.castorToLFN(castorDir) if listLevel > 1: contents = castortools.ls(castorDir) for c in contents: print c if listLevel > 0 and localDir != None: print 'local:' print localDir if os.path.isdir(localDir): if listLevel > 1: os.system('ls -l ' + localDir) # print localDir + '*.root' else: if listLevel > 0: print 'TO BE IMPORTED' if listLevel > 0: print print
def test(self, previous=None, timeout=-1): if not castortools.fileExists(self.directory): raise Exception( "The top level directory '%s' for this dataset does not exist" % self.directory) self.query() test_results = {} #support updating to speed things up prev_results = {} if previous is not None: for name, status in previous['Files'].iteritems(): prev_results[name] = status filesToTest = self.sortByBaseDir(self.listRootFiles(self.directory)) for dir, filelist in filesToTest.iteritems(): filemask = {} #apply a UNIX wildcard if specified filtered = filelist if self.options.wildcard is not None: filtered = fnmatch.filter(filelist, self.options.wildcard) if not filtered: print >> sys.stderr, "Warning: The wildcard '%s' does not match any files in '%s'. Please check you are using quotes." % ( self.options.wildcard, self.directory) count = 0 for ff in filtered: fname = os.path.join(dir, ff) lfn = castortools.castorToLFN(fname) #try to update from the previous result if available if lfn in prev_results and prev_results[lfn][0]: if self.options.printout: print '[%i/%i]\t Skipping %s...' % ( count, len(filtered), fname), OK, num = prev_results[lfn] else: if self.options.printout: print '[%i/%i]\t Checking %s...' % ( count, len(filtered), fname), OK, num = self.testFileTimeOut(lfn, timeout) filemask[ff] = (OK, num) if self.options.printout: print(OK, num) if OK: self.eventsSeen += num count += 1 test_results[castortools.castorToLFN(dir)] = filemask self.test_result = test_results self.duplicates, self.bad_jobs, sum_dup = self.stripDuplicates() #remove duplicate entries from the event count self.eventsSeen -= sum_dup
def test(self, previous = None, timeout = -1): if not castortools.fileExists(self.directory): raise Exception("The top level directory '%s' for this dataset does not exist" % self.directory) self.query() test_results = {} #support updating to speed things up prev_results = {} if previous is not None: for name, status in previous['Files'].iteritems(): prev_results[name] = status filesToTest = self.sortByBaseDir(self.listRootFiles(self.directory)) for dir, filelist in filesToTest.iteritems(): filemask = {} #apply a UNIX wildcard if specified filtered = filelist if self.options.wildcard is not None: filtered = fnmatch.filter(filelist, self.options.wildcard) if not filtered: print >> sys.stderr, "Warning: The wildcard '%s' does not match any files in '%s'. Please check you are using quotes." % (self.options.wildcard,self.directory) count = 0 for ff in filtered: fname = os.path.join(dir, ff) lfn = castortools.castorToLFN(fname) #try to update from the previous result if available if lfn in prev_results and prev_results[lfn][0]: if self.options.printout: print '[%i/%i]\t Skipping %s...' % (count, len(filtered),fname), OK, num = prev_results[lfn] else: if self.options.printout: print '[%i/%i]\t Checking %s...' % (count, len(filtered),fname), OK, num = self.testFileTimeOut(lfn, timeout) filemask[ff] = (OK,num) if self.options.printout: print (OK, num) if OK: self.eventsSeen += num count += 1 test_results[castortools.castorToLFN(dir)] = filemask self.test_result = test_results self.duplicates, self.bad_jobs, sum_dup = self.stripDuplicates() #remove duplicate entries from the event count self.eventsSeen -= sum_dup
def publish(self, report): """Publish a file""" for path in report['PathList']: _, name = tempfile.mkstemp('.txt', text=True) json.dump(report, file(name,'w'), sort_keys=True, indent=4) fname = '%s_%s.txt' % (self.parent, report['DateCreated']) #rename the file locally - TODO: This is a potential problem nname = os.path.join(os.path.dirname(name),fname) os.rename(name, nname) castor_path = castortools.lfnToCastor(path) new_name = '%s/%s' % (castor_path, fname) castortools.xrdcp(nname,path) time.sleep(1) if castortools.fileExists(new_name): #castortools.move(old_name, new_name) #castortools.chmod(new_name, '644') print "File published: '%s'" % castortools.castorToLFN(new_name) os.remove(nname) else: pathhash = path.replace('/','.') hashed_name = 'PublishToFileSystem-%s-%s' % (pathhash, fname) shutil.move(nname, hashed_name) print >> sys.stderr, "Cannot write to directory '%s' - written to local file '%s' instead." % (castor_path, hashed_name)
def __init__(self, dirLocalOrTgzDirOnCastor, castorTgz, dbsAPI): #self.dbAPI = DatabaseAPI.DatabaseAPI('/afs/cern.ch/user/p/pmeckiff/public/bookkeeping.db') self.dirLocal = None self.tgzDirOnCastor = None self.dirOnCastor = None self.setName = dirLocalOrTgzDirOnCastor self.dbsAPI = dbsAPI # Set Directory name if local local = dirLocalOrTgzDirOnCastor.rstrip('/') castorTgz = castortools.castorToLFN(castorTgz) print castorTgz # Check if local first (obviously) if self.isDirLocal(local ): print "File is on local machine: " + local self.dirLocal = local #if found set class attribute # Check if on castor next elif self.isTgzDirOnCastor(castorTgz): print "File is directory on Castor" self.tgzDirOnCastor = castorTgz # if found set class attribute for i in castortools.matchingFiles(castorTgz.rstrip("/Logger.tgz"), ".*tgz"): print i # If logger is not present but directory exists elif self.isDirOnCastor(castorTgz.rstrip("/Logger.tgz")): print "Directory is valid on Castor, but no logger file is present." self.dirOnCastor = castorTgz.rstrip("/Logger.tgz") # If neither then raise an exception else: raise ValueError( dirLocalOrTgzDirOnCastor + ' is neither a tgz directory on castor (provide a LFN!) nor a local directory')
def publish(self, report): """Publish a file""" for path in report['PathList']: _, name = tempfile.mkstemp('.txt', text=True) json.dump(report, file(name, 'w'), sort_keys=True, indent=4) fname = '%s_%s.txt' % (self.parent, report['DateCreated']) #rename the file locally - TODO: This is a potential problem nname = os.path.join(os.path.dirname(name), fname) os.rename(name, nname) castor_path = castortools.lfnToCastor(path) new_name = '%s/%s' % (castor_path, fname) castortools.xrdcp(nname, path) time.sleep(1) if castortools.fileExists(new_name): #castortools.move(old_name, new_name) #castortools.chmod(new_name, '644') print "File published: '%s'" % castortools.castorToLFN( new_name) os.remove(nname) else: pathhash = path.replace('/', '.') hashed_name = 'PublishToFileSystem-%s-%s' % (pathhash, fname) shutil.move(nname, hashed_name) print >> sys.stderr, "Cannot write to directory '%s' - written to local file '%s' instead." % ( castor_path, hashed_name)
def run(self, input): find = FindOnCastor(self.dataset, self.options.batch_user, self.options) find.create = True out = find.run({}) full = input['ExpandConfig']['ExpandedFullCFG'] jobdir = input['CreateJobDirectory']['JobDir'] sampleDir = os.path.join(out['Directory'], self.options.tier) sampleDir = castortools.castorToLFN(sampleDir) cmd = [ 'cmsBatch.py', str(self.options.nInput), os.path.basename(full), '-o', '%s_Jobs' % self.options.tier, '--force' ] cmd.extend(['-r', sampleDir]) if self.options.run_batch: jname = "%s/%s" % (self.dataset, self.options.tier) jname = jname.replace("//", "/") user_group = '' if self.options.group is not None: user_group = '-G %s' % self.options.group cmd.extend([ '-b', "'bsub -q %s -J %s -u [email protected] %s < ./batchScript.sh | tee job_id.txt'" % (self.options.queue, jname, user_group) ]) print " ".join(cmd) pwd = os.getcwd() error = None try: os.chdir(jobdir) returncode = os.system(" ".join(cmd)) if returncode != 0: error = "Running cmsBatch failed. Return code was %i." % returncode finally: os.chdir(pwd) if error is not None: raise Exception(error) return { 'SampleDataset': "%s/%s" % (self.dataset, self.options.tier), 'BatchUser': self.options.batch_user, 'SampleOutputDir': sampleDir, 'LSFJobsTopDir': os.path.join(jobdir, '%s_Jobs' % self.options.tier) }
def isTgzDirOnCastor(self, file): if not castortools.isCastorDir(file): file = castortools.castorToLFN(file) if castortools.isLFN(file): tgzPattern = re.compile('.*\.tgz$') m = tgzPattern.match(file) if m: return True else: return False else: return False
def isTgzDirOnEOS(self, file ): '''Checks if file is a .tgz file in an eos dir''' if not castortools.isCastorDir( file ): file = castortools.castorToLFN(file) if castortools.isLFN( file ): tgzPattern = re.compile('.*\.tgz$') m = tgzPattern.match( file ) if m: return True else: return False else: return False
def isTgzDirOnCastor(self, file ): if not castortools.isCastorDir( file ): file = castortools.castorToLFN(file) if castortools.isLFN( file ): tgzPattern = re.compile('.*\.tgz$') m = tgzPattern.match( file ) if m: return True else: return False else: return False
def run(self, input): find = FindOnCastor(self.dataset,self.options.batch_user,self.options) find.create = True out = find.run({}) full = input['ExpandConfig']['ExpandedFullCFG'] jobdir = input['CreateJobDirectory']['JobDir'] sampleDir = os.path.join(out['Directory'],self.options.tier) sampleDir = castortools.castorToLFN(sampleDir) cmd = ['cmsBatch.py',str(self.options.nInput),os.path.basename(full),'-o','%s_Jobs' % self.options.tier,'--force'] cmd.extend(['-r',sampleDir]) if self.options.run_batch: jname = "%s/%s" % (self.dataset,self.options.tier) jname = jname.replace("//","/") user_group = '' if self.options.group is not None: user_group = '-G %s' % self.options.group cmd.extend(['-b',"'bsub -q %s -J %s -u [email protected] %s < ./batchScript.sh | tee job_id.txt'" % (self.options.queue,jname,user_group)]) print " ".join(cmd) pwd = os.getcwd() error = None try: os.chdir(jobdir) returncode = os.system(" ".join(cmd)) if returncode != 0: error = "Running cmsBatch failed. Return code was %i." % returncode finally: os.chdir(pwd) if error is not None: raise Exception(error) return {'SampleDataset':"%s/%s" % (self.dataset,self.options.tier),'BatchUser':self.options.batch_user, 'SampleOutputDir':sampleDir,'LSFJobsTopDir':os.path.join(jobdir,'%s_Jobs' % self.options.tier)}
def run(self, input): """Check that the directory is writable""" if self.user == 'CMS': return {'Directory':None,'WriteAccess':True} dir = input['FindOnCastor']['Directory'] if self.options.check: _, name = tempfile.mkstemp('.txt',text=True) testFile = file(name,'w') testFile.write('Test file') testFile.close() store = castortools.castorToLFN(dir) #this is bad, but castortools is giving me problems if not os.system('cmsStage %s %s' % (name,store)): fname = '%s/%s' % (dir,os.path.basename(name)) write = castortools.fileExists(fname) if write: castortools.rm(fname) else: raise Exception("Failed to write to directory '%s'" % dir) os.remove(name) return {'Directory':dir,'WriteAccess':True}
safe_name = d.replace('/','_') if safe_name.startswith('_'): safe_name = safe_name[1:] if safe_name.endswith('_'): safe_name = safe_name[:-1] config.add_section(safe_name) directory = '%s/%s' % (topdir,d) if opts.tier: directory = os.path.join(directory,opts.tier) directory = directory.replace('//','/') config.set(safe_name,'CMSSW.datasetpath',d) lfn = castortools.castorToLFN(directory) config.set(safe_name,'USER.user_remote_dir',lfn) output_dirs.append(lfn) #create the directory on EOS if not castortools.fileExists(directory): castortools.createCastorDir(directory) castortools.chmod(directory,'775') if not castortools.isDirectory(directory): raise Exception("Dataset directory '%s' does not exist or could not be created" % directory) config.write(file(opts.output,'wb')) from logger import logger logDir = 'Logger' os.mkdir(logDir)
newPSet = ldir + "/" + match.group(1) os.system('cp %s %s' % (pset, newPSet)) # find and copy json file match = patternLumiMask.match(line) if match != None: json = match.group(1) newJson = ldir + "/" + match.group(1) os.system('cp %s %s' % (json, newJson)) newCrab.write(line) newCrab.write('[CMSSW]\n') newCrab.write('datasetpath = ' + sampleName + '\n') newCrab.write('[USER]\n') newCrab.write('user_remote_dir = %s\n' % castortools.castorToLFN(cdir)) addToDatasets(sampleNameDir, user=options.user) from logger import logger oldPwd = os.getcwd() os.chdir(ldir) logDir = 'Logger' os.system('mkdir ' + logDir) log = logger(logDir) log.logCMSSW() #COLIN not so elegant... but tar is behaving in a strange way. log.addFile(oldPwd + '/' + pset) log.addFile(oldPwd + '/' + 'crab.cfg') log.stageOut(cdir)
safe_name = d.replace('/', '_') if safe_name.startswith('_'): safe_name = safe_name[1:] if safe_name.endswith('_'): safe_name = safe_name[:-1] config.add_section(safe_name) directory = '%s/%s' % (topdir, d) if opts.tier: directory = os.path.join(directory, opts.tier) directory = directory.replace('//', '/') config.set(safe_name, 'CMSSW.datasetpath', d) lfn = castortools.castorToLFN(directory) config.set(safe_name, 'USER.user_remote_dir', lfn) output_dirs.append(lfn) #create the directory on EOS if not castortools.fileExists(directory): castortools.createCastorDir(directory) castortools.chmod(directory, '775') if not castortools.isDirectory(directory): raise Exception( "Dataset directory '%s' does not exist or could not be created" % directory) config.write(file(opts.output, 'wb')) from logger import logger
newPSet = ldir + "/" + match.group(1) os.system('cp %s %s' % (pset, newPSet) ) # find and copy json file match = patternLumiMask.match( line ) if match != None: json = match.group(1) newJson = ldir + "/" + match.group(1) os.system('cp %s %s' % (json, newJson) ) newCrab.write( line ) newCrab.write('[CMSSW]\n') newCrab.write('datasetpath = '+sampleName+'\n') newCrab.write('[USER]\n') newCrab.write('user_remote_dir = %s\n' % castortools.castorToLFN(cdir) ) addToDatasets( sampleNameDir , user = options.user) from logger import logger oldPwd = os.getcwd() os.chdir(ldir) logDir = 'Logger' os.system( 'mkdir ' + logDir ) log = logger( logDir ) log.logCMSSW() #COLIN not so elegant... but tar is behaving in a strange way. log.addFile( oldPwd + '/' + pset ) log.addFile( oldPwd + '/' + 'crab.cfg' ) log.stageOut( cdir )