def __init__(self,cache_dir,executable=None,arglist=None,arguments=None,nper=1): super(SimpleSubmission,self).__init__(cache_dir+'/submission.pkl') self.cache_dir = cache_dir if executable!=None: self.executable = executable self.arguments = arguments self.arglist = arglist self.nper = nper else: try: pkl = pickle.load(open(self.cache_filepath)) last_sub = pkl[-1] self.executable = last_sub.executable self.arguments = last_sub.arguments self.arglist = last_sub.arglist self.nper = last_sub.nper except: PError(self.__class__.__name__+'.__init__', 'Must provide a valid cache or arguments!') raise RuntimeError self.cmssw = getenv('CMSSW_BASE') self.workdir = cache_dir + '/workdir/' self.logdir = cache_dir + '/logdir/' for d in [self.workdir,self.logdir]: system('mkdir -p '+d) if type(self.arglist)==list: with open(cache_dir+'/workdir/args.list','w') as fargs: fargs.write('\n'.join(self.arglist)) if not self.arguments: self.arguments = range(1,len(self.arglist)+1) self.arglist = cache_dir+'/workdir/args.list'
def query_status(self): if not self.cluster_id: PError(self.__class__.__name__+".query_status", "This submission has not been executed yet (ClusterId not set)") raise RuntimeError results = self.schedd.query('ClusterId =?= %i'%(self.cluster_id)) jobs = {x:[] for x in ['T3','T2','idle','held','other']} for job in results: proc_id = int(job['ProcId']) status = job['JobStatus'] try: if type(self.arguments)==dict: samples = [self.arguments[self.proc_ids[proc_id]]] else: samples = self.proc_ids[proc_id].split() except KeyError: continue # sometimes one extra dummy job is created and not tracked, oh well if job_status[status] == 'running': try: remote_host = job['RemoteHost'] if '@T3' in remote_host: status = job_status_rev['T3'] else: status = job_status_rev['T2'] except KeyError: status = 1 # call it idle, job is probably moving between states pass if job_status[status] in jobs: jobs[job_status[status]] += samples else: jobs['other'] += samples return jobs
def read_branches(filenames, tree, branches, cut, treename="events", xkwargs={}): if not (filenames or treename) or (filenames and tree): PError("root_interface.read_branches", "Exactly one of filenames and tree should be specified!") return None if branches: branches_ = list(set(branches)) # remove duplicates else: branches_ = None if filenames: return rnp.root2array(filenames=filenames, treename=treename, branches=branches_, selection=cut, **xkwargs) else: return rnp.tree2array(tree=tree, branches=branches_, selection=cut, **xkwargs)
def kill(self): if not self.cluster_id: PError(self.__class__.__name__+".query_status", "This submission has not been executed yet (ClusterId not set)") raise RuntimeError N = self.schedd.act(htcondor.JobAction.Remove, ["%s.%s"%(self.cluster_id, p) for p in self.proc_ids])['TotalSuccess'] if N: PInfo(self.__class__.__name__+'.kill', 'Killed %i jobs in ClusterId=%i'%(N,self.cluster_id))
def query_status(self): if not self.cluster_id: PError( "Submission.status", "This submission has not been executed yet (ClusterId not set)" ) results = self.schedd.query('Owner =?= "%s" && ClusterId =?= %i' % (getenv('USER'), self.cluster_id)) jobs = {x: [] for x in ['running', 'idle', 'held', 'other']} for job in results: proc_id = job['ProcId'] status = job['JobStatus'] sample = self.sample_config[self.proc_ids[proc_id]] if job_status[status] in jobs: jobs[job_status[status]].append(sample) else: jobs['other'].append(sample) return jobs
def setup_schedd(config='T3'): global pool_server, schedd_server, base_job_properties, should_spool if config=='T3' or config is None: base_job_properties = { "Iwd" : "WORKDIR", "Cmd" : "WORKDIR/exec.sh", "WhenToTransferOutput" : "ON_EXIT", "ShouldTransferFiles" : "YES", #"Requirements" : # classad.ExprTree('UidDomain == "mit.edu" && Arch == "X86_64" && OpSysAndVer == "SL6"'), #"AcctGroup" : acct_grp_t3, # "AccountingGroup" : '%s.USER'%(acct_grp_t3), "X509UserProxy" : "/tmp/x509up_uUID", "OnExitHold" : classad.ExprTree("( ExitBySignal == true ) || ( ExitCode != 0 )"), "In" : "/dev/null", "TransferInput" : "WORKDIR/cmssw.tgz,WORKDIR/skim.py,WORKDIR/x509up", } pool_server = None schedd_server = getenv('HOSTNAME') should_spool = False query_owner = getenv('USER') elif config=='T2': base_job_properties = { "Iwd" : "WORKDIR", "Cmd" : "WORKDIR/exec.sh", "WhenToTransferOutput" : "ON_EXIT", "ShouldTransferFiles" : "YES", "X509UserProxy" : "/tmp/x509up_uUID", #"OnExitHold" : classad.ExprTree("( ExitBySignal == true ) || ( ExitCode != 0 )"), "RequestMemory" : 2500, "In" : "/dev/null", "TransferInput" : "WORKDIR/cmssw.tgz,WORKDIR/skim.py,WORKDIR/x509up", } pool_server = None schedd_server = getenv('HOSTNAME') should_spool = False query_owner = getenv('USER') elif config=='SubMIT': base_job_properties = { "Iwd" : "WORKDIR", "Cmd" : "WORKDIR/exec.sh", "WhenToTransferOutput" : "ON_EXIT", "ShouldTransferFiles" : "YES", "Requirements" : classad.ExprTree('Arch == "X86_64" && ( isUndefined(IS_GLIDEIN) || ( OSGVO_OS_STRING == "RHEL 6" && \ HAS_CVMFS_cms_cern_ch == True ) || GLIDEIN_REQUIRED_OS == "rhel6" || ( Has_CVMFS_cms_cern_ch == True && \ (BOSCOGroup == "bosco_cms" || BOSCOGroup == "paus") ) ) && (isUndefined(GLIDEIN_Entry_Name) || \ !stringListMember(GLIDEIN_Entry_Name, "CMS_T2_US_Nebraska_Red,CMS_T2_US_Nebraska_Red_op,CMS_T2_US_Nebraska_Red_gw1,\ CMS_T2_US_Nebraska_Red_gw1_op,CMS_T2_US_Nebraska_Red_gw2,CMS_T2_US_Nebraska_Red_gw2_op,CMS_T3_MX_Cinvestav_proton_work,\ CMS_T3_US_Omaha_tusker,CMSHTPC_T1_FR_CCIN2P3_cccreamceli01_multicore,CMSHTPC_T1_FR_CCIN2P3_cccreamceli02_multicore,\ CMSHTPC_T1_FR_CCIN2P3_cccreamceli03_multicore,CMSHTPC_T1_FR_CCIN2P3_cccreamceli04_multicore,\ CMSHTPC_T2_FR_CCIN2P3_cccreamceli01_multicore,CMSHTPC_T2_FR_CCIN2P3_cccreamceli02_multicore,\ CMSHTPC_T2_FR_CCIN2P3_cccreamceli03_multicore,CMSHTPC_T3_US_Omaha_tusker,Engage_US_MWT2_iut2_condce,\ Engage_US_MWT2_iut2_condce_mcore,Engage_US_MWT2_osg_condce,Engage_US_MWT2_osg_condce_mcore,Engage_US_MWT2_uct2_condce,\ Engage_US_MWT2_uct2_condce_mcore,Glow_US_Syracuse_condor,Glow_US_Syracuse_condor-ce01,Gluex_US_NUMEP_grid1,HCC_US_BNL_gk01,\ HCC_US_BNL_gk02,HCC_US_BU_atlas-net2,HCC_US_BU_atlas-net2_long,HCC_US_SWT2_gk01,IceCube_US_Wisconsin_osg-ce,\ OSG_US_Clemson-Palmetto_condce,OSG_US_Clemson-Palmetto_condce_mcore,OSG_US_FIU_HPCOSGCE,OSG_US_Hyak_osg,OSG_US_IIT_iitgrid_rhel6,\ OSG_US_MWT2_mwt2_condce,OSG_US_MWT2_mwt2_condce_mcore,OSG_US_UConn_gluskap,OSG_US_SMU_mfosgce", ",")) && (isUndefined(GLIDEIN_Site)\ || !stringListMember(GLIDEIN_Site, "HOSTED_BOSCO_CE", ","))'), "AcctGroup" : "analysis", "AccountingGroup" : "analysis.USER", "X509UserProxy" : "/tmp/x509up_u2268", "OnExitHold" : classad.ExprTree("( ExitBySignal == true ) || ( ExitCode != 0 )"), "In" : "/dev/null", "TransferInput" : "WORKDIR/cmssw.tgz,WORKDIR/skim.py,WORKDIR/x509up", "ProjectName" : "CpDarkMatterSimulation", "Rank" : "Mips", 'SubMITOwner' : 'USER', } pool_server = 'submit.mit.edu:9615' schedd_server ='submit.mit.edu' query_owner = getenv('USER') should_spool = False else: PError('job_management.setup_schedd','Unknown config %s'%config) raise ValueError