def __init__(self): ''' Constructor ''' self.files = {} self.allocDirectories = [] self.directories = set() self.steps = [] self.services = [] self.notifications = [] self.log = Logger('log.txt', '2', 'workflow', True).getlog()
def run(args, state): analysisDict = state.analysisDict sampleName = args.sampleName logger = Logger(os.path.join(state.scriptsDir, 'log'), '1', 'gaeaJobMonitor', False).getlog() isComplete = bundle() all_done = True jobList = args.jobs.split(',') if jobList[0] == 'init': if not state.results['init'].get('script'): jobList = jobList[1:] for num, step in enumerate(jobList): if analysisDict[step].platform == 'S': continue n = state.analysisList.index(step) if state.analysisList[0] != 'init': n += 1 script = state.results[step]['script'][sampleName] if num > 0: for depStep in analysisDict[step].depend: if not isComplete[depStep]: isComplete[step] = False break if isComplete.has_key(step) and isComplete[step] == False: logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) continue printtime('step: %s start...' % step) p = subprocess.Popen('sh %s' % script, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) isComplete[step] = check_log(p, script, sampleName, n, step) if isComplete[step] or step == 'alignment': if step == 'alignment': isComplete[step] = True printtime("step: %s complete" % step) logger.info('%s - step %d: %s complete' % (sampleName, n, step)) out_fh = open(script + '.o', 'w') for line in p.stdout.readlines(): print >> out_fh, line[:-1] p.wait() else: all_done = False printtime("%s failed" % step) logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) if p.returncode == None: p.kill() return all_done
def main(): program_name = os.path.basename(sys.argv[0]) program_license = '''{0} Created by huangzhibo on {1}. Last updated on {2}. Copyright 2017 BGI bigData. All rights reserved. USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__)) parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]", required=True) parser.add_argument("-r", "--rerun", dest="rerun", help="rerun file,[default: %(default)s]", required=True) if len(sys.argv) == 1: parser.print_help() exit(1) # Process arguments args = parser.parse_args() if not os.path.exists(args.state): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 1 if not os.path.exists(args.rerun): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 1 state = ParseConfig(args.state).parseState() if 'bamSort' in state: state.bamSort.exclusive_task = 'False' if 'bamSort_M' in state: state.bamSort_M.exclusive_task = 'False' state.init.exclusive_task = 'False' if 'bamindex' in state: state.bamindex.exclusive_task = 'False' logger = Logger(os.path.join(state.scriptsDir, 'log'), '1', 'job_scheduler', False).getlog() state.logger = logger sched = Scheduler(state) sched.parse_rerun(args.rerun) sched.start() with open(os.path.join(state.stateDir, 'success'), 'w') as f: f.write('done!') return 0
def __init__(self): ''' Constructor ''' self.files = {} self.allocDirectories = [] self.directories = set() self.steps = [] self.services = [] self.notifications = [] self.log = Logger('log.txt','2','workflow',True).getlog()
def run(args,state): analysisDict = state.analysisDict sampleName = args.sampleName logger = Logger(os.path.join(state.scriptsDir,'log'),'1','gaeaJobMonitor',False).getlog() isComplete = bundle() all_done = True jobList = args.jobs.split(',') if jobList[0] == 'init': if not state.results['init'].get('script'): jobList = jobList[1:] for num,step in enumerate(jobList): if analysisDict[step].platform == 'S': continue n = state.analysisList.index(step) if state.analysisList[0] != 'init': n += 1 script = state.results[step]['script'][sampleName] if num > 0: for depStep in analysisDict[step].depend: if not isComplete[depStep]: isComplete[step] = False break if isComplete.has_key(step) and isComplete[step] == False: logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) continue printtime('step: %s start...' % step) p = subprocess.Popen('sh %s' % script, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) isComplete[step] = check_log(p,script,sampleName,n, step) if isComplete[step] or step == 'alignment': if step == 'alignment': isComplete[step] = True printtime("step: %s complete" % step) logger.info('%s - step %d: %s complete' % (sampleName, n, step)) out_fh = open(script+'.o', 'w') for line in p.stdout.readlines(): print >>out_fh, line[:-1] p.wait() else: all_done = False printtime("%s failed" % step) logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) if p.returncode == None: p.kill() return all_done
from gaeautils import bundle, Logger import glob import os logger = Logger('log.txt','2',"mode6",True).getlog() def parse_sample(sampleList): total_number = 0 sampleInfo = bundle() with open(sampleList,'r') as f: for line in f: fq1s = [] line = line.strip() field = line.split() rg_LB = field[2] rg_PU = field[3] sampleName = field[1] if field[3].find(',') != -1: fq1s.append(field[3].split(',')[0]) rg_LB = field[1] rg_PU = field[2] sampleName = field[0] else: fq_dir = field[-1].strip() fq1s = glob.glob("%s/*1.fq.gz" % fq_dir) if not fq1s: fq1s = glob.glob("%s/*/*1.fq.gz" % fq_dir) if len(fq1s) == 0 or not os.path.exists(fq1s[0]): logger.error("fq1 under %s don't exists." % sampleName)
class impl(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.files = {} self.allocDirectories = [] self.directories = set() self.steps = [] self.services = [] self.notifications = [] self.log = Logger('log.txt', '2', 'workflow', True).getlog() # def delete_HDFS_dir(self, impl, hdfs_dir): # cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir) # cmdStr += "if [ $? -ne 0 ]; then\n" # cmdStr += " %s %s\n" % (impl.fs_cmd.delete, hdfs_dir) # cmdStr += "fi\n" # return cmdStr def splitext(self, p): if not os.path.exists(p): return False else: return os.path.splitext(os.path.basename(p)) def expath(self, path, name, chechExists=''): if not name: if chechExists: raise RuntimeError('Program is not exists: %s' % chechExists) else: return False if os.path.exists(name): return name else: for p in path.split(':'): if os.path.exists(os.path.join(p, name)): return os.path.join(p, name) if chechExists: raise RuntimeError('Program is not exists: %s' % chechExists) return False def mkdir(self, *args): p = _join_abs_file_name(*args) self.directories.add(p) return p def mkfile(self, name, data): if not os.path.isabs(name): raise RuntimeError('not an absolute path: %s' % name) name = os.path.normpath(name) self.files[name] = data return name def paramRectify(self, paramStr, paramElem, mustBe=True): if mustBe: if paramStr.find(paramElem) == -1: paramStr += " %s " % paramElem elif paramStr.find(paramElem) != -1: return paramStr.replace(paramElem, '') return paramStr def paramCheck(self, mustBe, paramStr, paramElem, paramValue=None): if mustBe: if paramStr.find(paramElem) == -1: if paramValue is not None: paramStr += " %s %s " % (paramElem, paramValue) else: paramStr += " %s " % paramElem elif paramValue is not None: m = re.match('.*%s\s+(\w+).*' % paramElem, paramStr) if m and m.group(1) != paramValue: strinfo = re.compile('%s\s+\w+' % paramElem) newParamStr = strinfo.sub( '%s %s' % (paramElem, paramValue), paramStr) self.log.warning("Rectify parameter '%s' to '%s'" % (paramStr, newParamStr)) return newParamStr elif paramStr.find(paramElem) != -1: return paramStr.replace(paramElem, '') return paramStr def hasParam(self, paramStr, paramElem): if paramStr.find(paramElem) != -1: return True return False def fileAppend(self, fh, commands, JobParamList={}): t = _generate_template(commands) for param in JobParamList: print >> fh, t.safe_substitute(param) def write_file(self, fileName, scriptsdir, commands, JobParamList=None, paramDict={}, addShellHeader=False): scriptDict = bundle() scriptDict.script = [] t = _generate_template(commands) m = re.match('.*\$\{(\S+)\}.*', fileName) if JobParamList and m: for d in JobParamList: if not d.has_key(m.group(1)): self.log.error( "Wrong about impl.write_file paramter: fileName. No %s in JobParamList." % m.group(1)) if paramDict: d.update(paramDict) file_name = _generate_template(fileName).safe_substitute(d) scriptFile = os.path.join(scriptsdir, file_name) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') print >> script, t.safe_substitute(d) else: scriptFile = os.path.join(scriptsdir, fileName) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if addShellHeader: print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ==========" % os.path.splitext( fileName)[0] _script_append(script, t, JobParamList, paramDict) print >> script, "echo ==========end %s at : `date` ========== " % os.path.splitext( fileName)[0] else: _script_append(script, t, JobParamList, paramDict) script.close() return scriptDict def write_shell(self, name, scriptsdir, commands, JobParamList=[], paramDict={}): t = _generate_template(commands) scriptFile = os.path.join(scriptsdir, name + '.sh') script = open(scriptFile, 'w') print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, JobParamList, paramDict) print >> script, "" print >> script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptFile def write_scripts(self, name, commands, JobParamList=[], paramDict={}): scriptDict = bundle() t = _generate_template(commands) if paramDict: t = Template(t.safe_substitute(paramDict)) for d in JobParamList: scriptsdir = d.get('SCRDIR') sampleName = d.get('SAMPLE') if not scriptsdir or not sampleName: self.log.error( "Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." % name) exit(1) scriptDict[sampleName] = os.path.join(scriptsdir, name + '.sh') script = open(scriptDict[sampleName], 'w') print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, paramDict=d) print >> script, "" print >> script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptDict def write_Scripts(self, name, scriptsdir, commands, JobParamList=[], paramDict={}, reducer=True): scriptDict = bundle() t = _generate_template(commands) scriptDict["script"] = [] for n, d in enumerate(JobParamList): if paramDict: d.update(paramDict) dataTag = str(n) if d.get('DATATAG'): dataTag = d.get('DATATAG') scriptFile = os.path.join(scriptsdir, name + '_' + dataTag + '.sh') scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if reducer: print >> script, t.safe_substitute(d) else: print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ========== %s" % name print >> script, t.safe_substitute(d) # print >>script, "\n" print >> script, "echo ==========end %s at : `date` ========== %s" % name script.close() return scriptDict
class CLIError(Exception): '''Generic exception to raise and log different fatal errors.''' def __init__(self, msg): super(CLIError).__init__(type(self)) self.msg = "E: %s" % msg def __str__(self): return self.msg def __unicode__(self): return self.msg logger = Logger('log.txt', '2', 'Gaea', True).getlog() def createWorkflowObject(workflowClass, state): wf = workflowClass(state) return wf def writeRunShell(gaeaShell, state): out = open(gaeaShell, 'w') print >> out, '#!/bin/sh' print >> out, "source %s/bin/activate" % state.GAEA_HOME print >> out, 'runtype=$*' print >> out, 'Usage()\n{' print >> out, '\techo "Usage: ./run.sh <options>"' print >> out, '\techo " Run Tasks (Local) : ./run.sh"'
class parseApp(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.files = {} self.allocDirectories = [] self.directories = set() self.steps = [] self.services = [] self.notifications = [] self.log = Logger('log.txt','2','workflow',True).getlog() def delete_HDFS_dir(self, impl, hdfs_dir): cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir) cmdStr += "if [ $? -ne 0 ]; then\n" cmdStr += " %s %s\n" % (impl.fs_cmd.delete, hdfs_dir) cmdStr += "fi\n" return cmdStr def splitext(self, p): if not os.path.exists(p): return False else: return os.path.splitext(os.path.basename(p)) def mkdir(self, *args): p = _join_abs_file_name(*args) self.directories.add(p) return p def mkfile(self, name, data): if not os.path.isabs(name): raise RuntimeError('not an absolute path: %s' % name) name = os.path.normpath(name) self.files[name] = data return name def paramRectify(self,paramStr,paramElem,mustBe=True): if mustBe: if paramStr.find(paramElem) == -1: paramStr += " %s " % paramElem elif paramStr.find(paramElem) != -1: return paramStr.replace('-I', '') return paramStr def fileAppend(self,fh,commands,JobParamList={}): t = _generate_template(commands) for param in JobParamList: print >>fh, t.safe_substitute(param) def write_file(self,fileName,scriptsdir,commands,JobParamList=None,paramDict={},addShellHeader=False): scriptDict = bundle() scriptDict.script = [] t = _generate_template(commands) m = re.match('.*\$\{(\S+)\}.*',fileName) if JobParamList and m: for d in JobParamList: if not d.has_key(m.group(1)): self.log.error("Wrong about impl.write_file paramter: fileName. No %s in JobParamList." % m.group(1)) if paramDict: d.update(paramDict) file_name = _generate_template(fileName).safe_substitute(d) scriptFile = os.path.join(scriptsdir,file_name) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') print >>script, t.safe_substitute(d) else: scriptFile = os.path.join(scriptsdir,fileName) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if addShellHeader: print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ==========" % os.path.splitext(fileName)[0] _script_append(script, t, JobParamList, paramDict) print >>script, "echo ==========end %s at : `date` ========== " % os.path.splitext(fileName)[0] else: _script_append(script, t, JobParamList, paramDict) script.close() return scriptDict def write_shell(self, name, scriptsdir, commands, JobParamList=[], paramDict={}): scriptDict = bundle() t = _generate_template(commands) scriptFile = os.path.join(scriptsdir,name+'.sh') script = open(scriptFile, 'w') print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, JobParamList, paramDict) print >>script, "" print >>script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptFile def write_scripts(self, name, commands, JobParamList=[], paramDict={}): scriptDict = bundle() t = _generate_template(commands) for d in JobParamList: scriptsdir = d.get('SCRDIR') sampleName = d.get('SAMPLE') if not scriptsdir or not sampleName: self.log.error("Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." % name) exit(1) if paramDict: d.update(paramDict) scriptDict[sampleName] = os.path.join(scriptsdir,name+'.sh') script = open(scriptDict[sampleName], 'w') print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, paramDict=d) print >>script, "" print >>script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptDict def write_Scripts(self, name, scriptsdir, commands, JobParamList=[], paramDict={}, reducer=True): scriptDict = bundle() t = _generate_template(commands) scriptDict["script"] = [] for n, d in enumerate(JobParamList): if paramDict: d.update(paramDict) dataTag = str(n) if d.get('DATATAG'): dataTag = d.get('DATATAG') scriptFile = os.path.join(scriptsdir,name+'_'+ dataTag +'.sh') scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if reducer: print >>script, t.safe_substitute(d) else: print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ========== %s" % name print >>script, t.safe_substitute(d) # print >>script, "\n" print >>script, "echo ==========end %s at : `date` ========== %s" % name script.close() return scriptDict
#!/usr/local/bin/python2.7 # encoding: utf-8 ''' Created on 2016年1月18日 @author: huangzhibo ''' from gaeautils import Logger, clean from gaeautils.bundle import bundle import json import os from configobj import ConfigObj logger = Logger('log.txt','2',"parseConfig",True).getlog() def bundle_rcopy(cfg): newdict = bundle() for entry in cfg: this_entry = cfg[entry] if isinstance(this_entry, dict): this_entry = bundle_rcopy(this_entry) elif isinstance(this_entry, list): # create a copy rather than a reference this_entry = list(this_entry) elif isinstance(this_entry, tuple): # create a copy rather than a reference this_entry = tuple(this_entry) newdict[entry] = this_entry return newdict def getAnalysisDict(analysis_flow): graph = bundle(init=bundle())
#!/usr/bin/env python # encoding: utf-8 from gaeautils import bundle, Logger, search_mod import os logger = Logger('log.txt', '2', "parseSampleList", True).getlog() class ParseSampleList(object): ''' This class is used to parse sample list ''' config = bundle() def __init__(self, sampleList, config): ''' Constructor ''' self.sampleList = sampleList self.config = config def rectify_gender(self, gender): if gender == 'F' or gender == 'female': return 'female' else: return 'male' def check_gender(self, sampleinfo, sampleName): sampleGender = '' for dataTag in sampleinfo: