def make(self): self.mkdir() f = open(self.log + 'list.txt', 'w') for i in self.dsts: fileList = myfun.findfile(i) if len(fileList) == 0: logger.warning("no dst in this file, " + i) continue for j in fileList: f.write(j + '\n') f.close() self.mkdir() dstname = name.getname(self.dsts) for i in range(0, len(self.dsts)): self.ajob(self.dsts[i], dstname[i]) for k in range(0, len(self.cut)): self.mkadd(self.tree[k], self.cut[k], self.name[k] + '.' + dstname[i], dstname[i]) f = open(self.mode + "hadd.sh", "w") f.write("#!/bin/bash\n") for i in self.name: f.write("hadd -f " + i + "_all.root " + i + ".*root && rm " + i + ".*root\n") f.close() do("chmod 755 " + self.mode + "hadd.sh") logger.info("Total: {} {}".format( do("ls jobs/*/*.txt -1 | wc -l").split()[0], "jobs"))
def ajob(self, dst, name): logger.info("Process " + dst) logger.info('each job contain about {} {}'.format(self.size, 'G dsts')) dsts = int(do('ls -1 -F ' + dst + r' | grep -v [/$] | wc -l')) logger.debug("total dsts: {}".format(dsts)) size = int(int(do('du ' + dst).split()[0]) / 1024. / 1024. / self.size) job = self.job + name root = self.rawpth + name util.mkdir(job) util.mkdir(root) j = subjobs.subjobs() j.setbody(self.body) jobnum = self._num if jobnum < size: jobnum = size + 1 if dsts < jobnum: jobnum = dsts j.setjobnum(jobnum) j.setjobname("jobs_") j.setname(self.rootnm) j.setdstpath(dst) j.setjobpath(job) j.setProcesser(20) j.drop(self._drop) j.setrootpath(root) j.jobs()
def hepsub(files): path = os.path.split(files[0])[0] do("cd {PATH}; chmod +x *.sh".format(PATH=path)) out = do("cd {PATH}; hep_sub -g physics {ProcId} -n {NUM}".format( PATH=path, NUM=len(files), ProcId=r"jobs_%{ProcId}.sh")) job_Id = out.split("\n")[-2].split()[-1] print("submitted to cluster {}".format(job_Id)) return job_Id
def __init__(self): self.dsts = [] self.body = '' self.wkpth = do('pwd').split()[0] + '/' self.job = self.wkpth + 'jobs/' self.log = self.wkpth + 'log/' self.mode = self.wkpth + 'merged/' self.rawpth = self.wkpth + 'rawFile/' self.cxxpth = self.wkpth + 'hadd/' self.rootnm = 'FILE' self.size = 1.0 self.cut = [] self._num = 1 self.tree = [] self.name = [] self._drop = []
def addJpsi(self, date): Dict = { 2009: '/besfs3/offline/data/703-1/jpsi/round02/dst', 2012: '/besfs3/offline/data/703-1/jpsi/round05/dst', 2017: '/bes3fs/offline/data/704-1/jpsi/round11/dst', 2018: '/bes3fs/offline/data/704-1/jpsi/round12/dst' } hintDict = { 2009: 'add 2009 Jpsi data(0.2 billion)', 2012: 'add 2012 Jpsi data(1.1 billion)', 2017: 'add 2017-2018 Jpsi data(4.6 billion)', 2018: 'add 2018-2019 Jpsi data(4.1 billion)' } print(hintDict[date]) dst = Dict[date] ll = do('ls %s/* -d' % dst).split() for l in ll: if "2019jpsi" in l: continue ana.addst(self, l)
phys_ana.setjobnum(5) # some one use `FILE`, while some prefer `FILE1` phys_ana.setrootname("FILE") ################################################################ # addst: # add a directory, which contains the `.dst` files directly. # Recommend use the following method, if you want to add too many directly # # dirList = do("ls /besfs3/offline/data/664p03/psip/12mc/* -d").split() # dirList += do("ls /besfs3/offline/data/664p03/psip/09mc/* -d").split() # for dir in dirList: # phys_ana.addst(dir) ################################################################ # 09 psi(3686) data Dst dirList = do("ls /bes3fs/offline/data/664-1/psip/dst/* -d").split() # 12 psi(3686) data Dst dirList += do("ls /bes3fs/offline/data/664p03/psip/dst/* -d").split() for dir in dirList: phys_ana.addst(dir) # addcut(`the input tree name`, `the cut`, `the output tree name`) # suggest to keep the output tree name same as the `input` phys_ana.addcut('sig', "1==1", 'sig') phys_ana.addcut('mc', "1==1", 'mc') phys_ana.make() # open the comment, once you decide to sub all jobs # phys_ana.sub()
def MaxJobs(): num = do( "/afs/ihep.ac.cn/soft/common/sysgroup/hep_job/bin/hep_q -u | wc -l") if num == "": num = 0 return 10000 - int(num)