def update_db_state(sample, status_tag, message=None): db_update = "/hwfssz1/ST_HEALTH/WGS/F16ZQSB1SY2582/personalgenome/lib/genome_api_for_gaea.pl" message_param = '' if message: message_param = '-message {}'.format(message) cmd = 'ssh 192.168.60.11 /hwfssz1/ST_MCHRI/CLINIC/SOFTWARES/bin/perl {} -sample_no {} -status {} {}'.format( db_update, sample, status_tag, message_param) printtime('INFO: {}'.format(cmd)) subprocess.call(cmd, shell=True)
def main(): program_name = os.path.basename(sys.argv[0]) program_license = '''{0} Created by huangzhibo on {1}. Last updated on {2}. Copyright 2017 BGI bigData. All rights reserved. USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__)) parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]", required=True) parser.add_argument("-r", "--rerun", dest="rerun", help="rerun file,[default: %(default)s]", required=True) if len(sys.argv) == 1: parser.print_help() exit(1) # Process arguments args = parser.parse_args() if not os.path.exists(args.state): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 1 if not os.path.exists(args.rerun): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 1 state = ParseConfig(args.state).parseState() if 'bamSort' in state: state.bamSort.exclusive_task = 'False' if 'bamSort_M' in state: state.bamSort_M.exclusive_task = 'False' state.init.exclusive_task = 'False' if 'bamindex' in state: state.bamindex.exclusive_task = 'False' logger = Logger(os.path.join(state.scriptsDir, 'log'), '1', 'job_scheduler', False).getlog() state.logger = logger sched = Scheduler(state) sched.parse_rerun(args.rerun) sched.start() with open(os.path.join(state.stateDir, 'success'), 'w') as f: f.write('done!') return 0
def main(): program_name = os.path.basename(sys.argv[0]) program_license = '''{0} Created by huangzhibo on {1}. Last updated on {2}. Copyright 2017 BGI bigData. All rights reserved. USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__)) parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-b", "--bedlist", help="sample_name,[default: %(default)s]", required=True) parser.add_argument("-o", "--output", help="output file path,[default: %(default)s]", required=True) parser.add_argument("-g", "--gvcf_out", help="gvcf output file path,[default: %(default)s]", ) parser.add_argument("-p", "--part_vcf_dir", help="part_vcf_dir,[default: %(default)s]", required=True) if len(sys.argv) == 1: parser.print_help() exit(1) # Process arguments args = parser.parse_args() if not os.path.exists(args.bedlist): printtime('ERROR: (--bedlist: %s) - No such file or directory' % args.state) return 1 if not os.path.exists(args.part_vcf_dir): printtime('ERROR: (--part_vcf_dir: %s) - No such file or directory' % args.state) return 1 bed_prefix = [] with open(args.bedlist, 'r') as beds: for bed in beds: bed_prefix.append(os.path.splitext(os.path.basename(bed))[0]) i = 30 while i: status = check_part_vcf(bed_prefix, args.part_vcf_dir) if status: break time.sleep(5) i -= 1 if status: print "part_vcf_dir is good!" merge_vcf(bed_prefix, args.part_vcf_dir, args.output) if args.gvcf_out: merge_vcf(bed_prefix, args.part_vcf_dir, args.gvcf_out, True) else: print "part_vcf_dir is bad!" return 0
def check_out_sdn(p,failFile,is_at_TH=False): JobId = '' for line in p.stdout.readlines(): printtime(line[:-1]) if is_at_TH: jobInfo = re.match(r'^Submitted batch job (\d+)$', line) else: jobInfo = re.match(r'^Your job (\d+) \("(.*?)"\) has been submitted$', line) if jobInfo: JobId = jobInfo.group(1) else: writefail("err happened when submit (qsub/sbatch). ", failFile) for line in p.stderr.readlines(): printtime('ERROR: %s' % line[:-1]) return JobId
def merge_vcf(bed_prefix, part_vcf_dir, out, gvcf=False): part_vcf_list = os.path.join(part_vcf_dir, 'part_vcf.list') suffix = vcf_suffix if gvcf: suffix = gvcf_suffix part_vcf_list = os.path.join(part_vcf_dir, 'part_gvcf.list') with open(part_vcf_list, 'w') as wf: for p in bed_prefix: part_vcf = os.path.join(part_vcf_dir, p+suffix) wf.write(part_vcf) wf.write('\n') cmd = '/hwfssz1/BIGDATA_COMPUTING/software/bin/bcftools concat --threads 24 -O z -a -f {} -o {}'.format(part_vcf_list, out) printtime('INFO: {}'.format(cmd)) subprocess.call(cmd, shell=True)
def merge_vcf(bed_prefix, part_vcf_dir, out, gvcf=False): part_vcf_list = os.path.join(part_vcf_dir, 'part_vcf.list') suffix = vcf_suffix if gvcf: suffix = gvcf_suffix part_vcf_list = os.path.join(part_vcf_dir, 'part_gvcf.list') with open(part_vcf_list, 'w') as wf: for p in bed_prefix: part_vcf = os.path.join(part_vcf_dir, p + suffix) wf.write(part_vcf) wf.write('\n') cmd = '/hwfssz1/BIGDATA_COMPUTING/software/bin/bcftools concat --threads 24 -O z -a -f {} -o {}'.format( part_vcf_list, out) printtime('INFO: {}'.format(cmd)) subprocess.call(cmd, shell=True)
def main(): program_name = os.path.basename(sys.argv[0]) program_license = '''{0} Created by huangzhibo on {1}. Last updated on {2}. Copyright 2017 BGI bigData. All rights reserved. USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__)) parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]", required=True) parser.add_argument("-r", "--rerun", dest="rerun", help="rerun file,[default: %(default)s]", required=True) if len(sys.argv) == 1: parser.print_help() exit(1) # Process arguments args = parser.parse_args() if not os.path.exists(args.state): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 1 if not os.path.exists(args.rerun): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 1 state = ParseConfig(args.state).parseState() if 'bamSort' in state: state.bamSort.exclusive_task = 'False' if 'bamSort_M' in state: state.bamSort_M.exclusive_task = 'False' state.init.exclusive_task = 'False' if 'bamindex' in state: state.bamindex.exclusive_task = 'False' logger = Logger(os.path.join(state.scriptsDir,'log'),'1','job_scheduler',False).getlog() state.logger = logger sched = Scheduler(state) sched.parse_rerun(args.rerun) sched.start() with open(os.path.join(state.stateDir,'success'), 'w') as f: f.write('done!') return 0
def main(): program_name = os.path.basename(sys.argv[0]) program_license = '''{0} Created by huangzhibo on {1}. Last updated on {2}. Copyright 2017 BGI bigData. All rights reserved. USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__)) parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]", required=True) parser.add_argument("-n", "--sample_name", dest="sample_name", help="sample_name,[default: %(default)s]", required=True) parser.add_argument("-t", "--step", dest="step", help="step,[default: %(default)s]") parser.add_argument("-d", "--db_state", action="store_true", help="update db state,[default: %(default)s]") if len(sys.argv) == 1: parser.print_help() exit(1) # Process arguments args = parser.parse_args() if not os.path.exists(args.state): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 1 state = ParseConfig(args.state).parseState() state_dir = os.path.join(state.stateDir, 'sample_state') if not os.path.exists(state_dir): os.mkdir(state_dir) status_tag = 'done' if args.step: steps = args.step.split(',') for step in steps: status = result_check(args.sample_name, state, step) if not status: printtime('ERROR: (step: %s) - No such file or directory' % step) if args.db_state: update_local_state(args.sample_name, state_dir, 'error', 'No results for step: {}'.format(step)) update_db_state(args.sample_name, 'error', 'No results for step: {}'.format(step)) else: update_local_state(args.sample_name, state_dir, 'error', 'No results for step: {}'.format(step)) return 1 else: printtime('INFO: (step: %s) - completed' % step) else: status_tag = 'running' print status_tag if args.db_state: update_local_state(args.sample_name, state_dir, status_tag) update_db_state(args.sample_name, status_tag) else: update_local_state(args.sample_name, state_dir, status_tag) return 0
def run(args, state): analysisDict = state.analysisDict sampleName = args.sampleName logger = Logger(os.path.join(state.scriptsDir, 'log'), '1', 'gaeaJobMonitor', False).getlog() isComplete = bundle() all_done = True jobList = args.jobs.split(',') if jobList[0] == 'init': if not state.results['init'].get('script'): jobList = jobList[1:] for num, step in enumerate(jobList): if analysisDict[step].platform == 'S': continue n = state.analysisList.index(step) if state.analysisList[0] != 'init': n += 1 script = state.results[step]['script'][sampleName] if num > 0: for depStep in analysisDict[step].depend: if not isComplete[depStep]: isComplete[step] = False break if isComplete.has_key(step) and isComplete[step] == False: logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) continue printtime('step: %s start...' % step) p = subprocess.Popen('sh %s' % script, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) isComplete[step] = check_log(p, script, sampleName, n, step) if isComplete[step] or step == 'alignment': if step == 'alignment': isComplete[step] = True printtime("step: %s complete" % step) logger.info('%s - step %d: %s complete' % (sampleName, n, step)) out_fh = open(script + '.o', 'w') for line in p.stdout.readlines(): print >> out_fh, line[:-1] p.wait() else: all_done = False printtime("%s failed" % step) logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) if p.returncode == None: p.kill() return all_done
def run(args,state): analysisDict = state.analysisDict sampleName = args.sampleName logger = Logger(os.path.join(state.scriptsDir,'log'),'1','gaeaJobMonitor',False).getlog() isComplete = bundle() all_done = True jobList = args.jobs.split(',') if jobList[0] == 'init': if not state.results['init'].get('script'): jobList = jobList[1:] for num,step in enumerate(jobList): if analysisDict[step].platform == 'S': continue n = state.analysisList.index(step) if state.analysisList[0] != 'init': n += 1 script = state.results[step]['script'][sampleName] if num > 0: for depStep in analysisDict[step].depend: if not isComplete[depStep]: isComplete[step] = False break if isComplete.has_key(step) and isComplete[step] == False: logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) continue printtime('step: %s start...' % step) p = subprocess.Popen('sh %s' % script, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) isComplete[step] = check_log(p,script,sampleName,n, step) if isComplete[step] or step == 'alignment': if step == 'alignment': isComplete[step] = True printtime("step: %s complete" % step) logger.info('%s - step %d: %s complete' % (sampleName, n, step)) out_fh = open(script+'.o', 'w') for line in p.stdout.readlines(): print >>out_fh, line[:-1] p.wait() else: all_done = False printtime("%s failed" % step) logger.warning('%s - step %d: %s failed' % (sampleName, n, step)) if p.returncode == None: p.kill() return all_done
def main(argv=None): # IGNORE:C0111 '''Command line options.''' if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_name = os.path.basename(sys.argv[0]) program_version = "v%s" % __version__ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) program_shortdesc = __import__('__main__').__doc__.split("\n")[1] program_license = '''%s Created by huangzhibo on %s. Copyright 2016 BGI_bigData. All rights reserved. USAGE ''' % (program_shortdesc, str(__date__)) try: # Setup argument parser parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-s", "--state", dest="state", help="state file,[default: %(default)s]",required=True) parser.add_argument("-r", "--rerun", dest="rerun", default='all',help="rerun file,[default: %(default)s]") # parser.add_argument("-s", "--submit", action="store_true", default=False, help="submit to SGE. if False , just generator gaea.sh. [default: %(default)s]") parser.add_argument("-t", "--type", dest="type", choices=['write','local','submit'], type=str, default="write", help="1.write: just write run scripts; 2.local: run tasks on one local node; 3:submit: submit tasks to SGE [default: %(default)s]") parser.add_argument("-q", "--queue", dest="queue", help="the queue of the job. [default: %(default)s]") parser.add_argument("-p", "--partition", dest="partition", help="the job partition. [default: %(default)s]") parser.add_argument('-V', '--version', action='version', version=program_version_message) # Process arguments args = parser.parse_args() if not os.path.exists(args.state): printtime('ERROR: (--state: %s) - No such file or directory' % args.state) return 2 state = ParseConfig(args.state).parseState() # if state.init.has_key("GAEA_HOME"): # os.environ["GAEA_HOME"] = state.GAEA_HOME # os.environ["PATH"] = os.environ["GAEA_HOME"] + ':' + os.environ["PATH"] state.failFile = os.path.join(state.stateDir,"failed") state.successFile = os.path.join(state.stateDir,"success") deleteFile(state.failFile) deleteFile(state.successFile) if os.path.exists(state.logfile): if not os.path.exists('%s.backup' % state.logfile): os.rename(state.logfile,'%s.backup' % state.logfile) else: subprocess.call("cat %s >>%s.backup" % (state.logfile,state.logfile),shell=True) deleteFile(state.logfile) if args.rerun == 'all': rerunInfo = parseRerun(state,False) elif args.rerun: rerunInfo = parseRerun(args.rerun) if not rerunInfo: rerunInfo = parseRerun(state,False) if args.type == 'submit' and state.hadoop.is_at_TH: args.type = 'local' state.hasSDNstep = False for l in rerunInfo: for s in l[1].split(','): if state.analysisDict[s].platform == 'S': state.hasSDNstep = True if state.hasSDNstep: if args.type == 'local' and not state.hadoop.is_at_TH: writefail("Has standalone step, please submit tasks to SGE. (-t submit)",state.failFile) if not state.hadoop.is_at_TH: if not args.queue: # or not args.partition: writefail("Has standalone step, please set parameters: -q -P ",state.failFile) lastJobId = multi_run(args,state,rerunInfo[0]) json.dump(state.results, open('%s/results.json' % state.stateDir, 'w'),indent=4) if args.type == 'submit': script = os.path.join(state.gaeaScriptsDir,"check_complete.sh") writeCheckShell(script,state,state.failFile,state.successFile) sh_err = '%s.e' % script sh_out = '%s.o' % script hold_jid = '' printtime("check end (%s) " %lastJobId) if re.match("\d+", lastJobId): hold_jid = lastJobId cmd = [] if hold_jid: if state.hasSDNstep: if args.partition: cmd = ['qsub','-cwd','-l','vf=0.5g', '-hold_jid',hold_jid,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l','vf=0.5g', '-hold_jid',hold_jid,'-q',args.queue,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l','vf=0.5g', '-hold_jid',hold_jid,'-q','gaea.q','-P','hadoop','-e',sh_err,'-o',sh_out, script] else: if state.hasSDNstep: if args.partition: cmd = ['qsub','-cwd','-l','vf=0.5g', '-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l','vf=0.5g', '-q',args.queue,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l','vf=0.5g', '-q','gaea.q','-P','hadoop','-e',sh_err,'-o',sh_out, script] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in p.stdout.readlines(): printtime(line[:-1]) jobInfo = re.match(r'^Your job (\d+) \("(.*?)"\) has been submitted$', line) if not jobInfo: writefail("err happened when qsub. ", state.failFile) for line in p.stderr.readlines(): printtime('ERROR: (check_complete.sh) %s' % line[:-1]) p.wait() elif args.type == 'local': failed = False logFile = open(state.logfile,'r') if os.path.exists(state.logfile): logFile = open(state.logfile,'r') for line in logFile: m = re.match('.*fail.*', line) if m: writefail("%s "% line, state.failFile) failed = True else: writefail("no start", state.failFile) failed = True if not failed: stat = open(os.path.join(state.stateDir,'success'), 'w') print >> stat, 'success' stat.close() return 0 except KeyboardInterrupt: ### handle keyboard interrupt ### return 0 except Exception, e: with open(os.path.join(state.stateDir,'failed'), 'w') as f: f.write("Error in submit jobs!") indent = len(program_name) * " " sys.stderr.write(program_name + ": " + repr(e) + "\n") sys.stderr.write(indent + " for help use --help") return 2
def multi_run(args,state,rerunInfo): failFile = state.failFile analysisDict = state.analysisDict lastJobId = [] sampleName = rerunInfo[0] jobList = getJobList(analysisDict,rerunInfo[1].split(',')) for n,job in enumerate(jobList): if len(jobList) > 1: shellName = 'gaea_%d' % n else: shellName = 'gaea' gaeaShell = os.path.join(state.gaeaScriptsDir,sampleName,'%s.sh'%shellName) writeGaeaShell(gaeaShell,state,job,sampleName) if args.type == 'local': p = subprocess.Popen('sh %s' % gaeaShell, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) check_out(p, gaeaShell) p.wait() stat = p.returncode if stat != 0: writefail('(gaea.sh) step failed with status %d' % stat, failFile) for step in job.split(','): if analysisDict[step].platform == 'H': continue lastJobId = [] scriptsDict = state.results[step]['script'] jobIdDict = bundle() depStep = [] for deps in analysisDict[step].depend: if analysisDict[deps].platform == 'S': depStep.append(deps) for sample_name in scriptsDict: script = scriptsDict[sample_name] sh_err = '%s.e' % script sh_out = '%s.o' % script cmd = [] if state.hadoop.is_at_TH: hold_jid = 'afterok' for ds in depStep: if state.results[ds].multiscript: hold_jid = hold_jid + ':%s' % state.results[ds]['jobId'][state.option.multiSampleName] elif state.results[step].multiscript: for sample in state.results[ds].jobId: hold_jid = hold_jid + ':%s' % state.results[ds].jobId[sample] else: hold_jid = hold_jid + ':%s' % state.results[ds]['jobId'][sample_name] if hold_jid == 'afterok': cmd = ['sbatch','-p',args.partition,'-e',sh_err,'-o',sh_out, script] else: cmd = ['sbatch','-p',args.partition,'-d',hold_jid,'-e',sh_err,'-o',sh_out, script] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) jobIdDict[sample_name] = check_out_sdn(p, failFile,True) else: hold_jid = '' for ds in depStep: if state.results[ds].multiscript: hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][state.option.multiSampleName] elif state.results[step].multiscript: for sample in state.results[ds].jobId: hold_jid = hold_jid + '%s,' % state.results[ds].jobId[sample] else: hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][sample_name] if state[step].get('mem'): vf = 'vf=%s' % state[step].mem if hold_jid: if args.partition: cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-e',sh_err,'-o',sh_out, script] else: if args.partition: cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-e',sh_err,'-o',sh_out, script] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) jobIdDict[sample_name] = check_out_sdn(p, failFile) lastJobId.append(jobIdDict[sample_name]) state.results[step]['jobId'] = jobIdDict elif args.type == 'submit': sh_err = '%s.e' % gaeaShell sh_out = '%s.o' % gaeaShell gaea_queue = 'gaea.q' gaea_partition = 'hadoop' if state.hadoop.cluster != 'cluster35': gaea_queue = args.queue gaea_partition = args.partition if gaea_partition: cmd = ['qsub','-cwd','-l','vf=2g','-q',gaea_queue,'-P',gaea_partition,'-e',sh_err,'-o',sh_out, gaeaShell] else: cmd = ['qsub','-cwd','-l','vf=2g','-q',gaea_queue,'-e',sh_err,'-o',sh_out, gaeaShell] hold_jid = '' if n > 0: for ds in analysisDict[job.split(',')[0]].depend: if analysisDict[ds].platform == 'S': for sampleName in state.results[ds].jobId: hold_jid = hold_jid + '%s,' % state.results[ds].jobId[sampleName] if hold_jid: if gaea_partition: cmd = ['qsub','-cwd','-l','vf=2g','-hold_jid',hold_jid,'-q',gaea_queue,'-P',gaea_partition,'-e',sh_err,'-o',sh_out, gaeaShell] else: cmd = ['qsub','-cwd','-l','vf=2g','-hold_jid',hold_jid,'-q',gaea_queue,'-e',sh_err,'-o',sh_out, gaeaShell] p = subprocess.Popen(cmd,stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in p.stderr.readlines(): printtime('ERROR: (%s) %s' % (shellName,line[:-1])) for line in p.stdout.readlines(): printtime(line[:-1]) jobInfo = re.match(r'^Your job (\d+) \("(.*?)"\) has been submitted$', line) if jobInfo: state.results.gaeaJobId = jobInfo.group(1) lastJobId.append(jobInfo.group(1)) else: writefail("err happened when qsub. (%s) " % shellName, failFile) exit(1) p.wait() for step in job.split(','): if analysisDict[step].platform == 'H': continue jobIdDict = bundle() scriptsDict = state.results[step]['script'] depStep = [] depHDP = False for deps in analysisDict[step].depend: if analysisDict[deps].platform == 'S': depStep.append(deps) else: depHDP = True lastJobId = [] for sample_name in scriptsDict: script = scriptsDict[sample_name] sh_err = '%s.e' % script sh_out = '%s.o' % script hold_jid = '' for ds in depStep: if state.results[ds].multiscript: hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][state.option.multiSampleName] elif state.results[step].multiscript: for sample in state.results[ds].jobId: hold_jid = hold_jid + '%s,' % state.results[ds].jobId[sample] else: hold_jid = hold_jid + '%s,' % state.results[ds]['jobId'][sample_name] if depHDP: hold_jid = hold_jid + '%s,' % state.results.gaeaJobId vf = 'vf=5g' if state[step].get('mem'): vf = 'vf=%s' % state[step].mem else: printtime("Standalone Step %s: No set mem info for SGE. Default:'5G'. " % step) if hold_jid: if args.partition: cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l',vf,'-hold_jid',hold_jid,'-q',args.queue,'-e',sh_err,'-o',sh_out, script] else: if args.partition: cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-P',args.partition,'-e',sh_err,'-o',sh_out, script] else: cmd = ['qsub','-cwd','-l',vf,'-q',args.queue,'-e',sh_err,'-o',sh_out, script] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) jobIdDict[sample_name] = check_out_sdn(p, failFile) lastJobId.append(jobIdDict[sample_name]) state.results[step]['jobId'] = jobIdDict allLastJobId = ','.join(lastJobId) return allLastJobId
if state.analysisList[0] == 'init': state.analysisList = state.analysisList[1:] jobs = ','.join(state.analysisList) rerun_fh.write('%s\t%s\n' % (state.option.multiSampleName, jobs)) json.dump(state, open('%s/state.json' % usercfg.stateDir, 'w'), indent=4) sys.stdout.flush() sys.stderr.flush() #write run.sh runShell = os.path.join(usercfg.scriptsDir, 'run.sh') writeRunShell(runShell, state) os.chmod( runShell, stat.S_IRWXU + stat.S_IRGRP + stat.S_IXGRP + stat.S_IROTH + stat.S_IXOTH) printtime("\nPlease run scripts/run.sh to submit tasks.") subprocess.call("sh %s write" % runShell, shell=True) def main(argv=None): # IGNORE:C0111 '''Command line options.''' if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_name = os.path.basename(sys.argv[0]) program_version = "v%s" % __version__ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % (program_version,
def result_check(data, size_threshold=4096): if not os.path.exists(data) or os.path.getsize(data) < size_threshold: printtime('ERROR: (data: %s) - Is incomplete!!!' % data) return False return True
def main(): program_name = os.path.basename(sys.argv[0]) program_license = '''{0} Created by huangzhibo on {1}. Last updated on {2}. Copyright 2017 BGI bigData. All rights reserved. USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__)) parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-b", "--bedlist", help="the bed.list to run HC streaming [required]", required=True) parser.add_argument("-o", "--outdir", help="outdir of part_vcf's symbolic link [%(default)s]", ) parser.add_argument("-s", "--suffix", help="part_vcf file suffix [%(default)s]", default='.hc.vcf.gz', ) parser.add_argument("-p", "--part_vcf_dir", help="the tmpfir of part_vcf [required]", required=True) parser.add_argument("-i", "--index_check", action="store_true", default=False, help="check vcf index [%(default)s]", ) if len(sys.argv) == 1: parser.print_help() exit(1) # Process arguments args = parser.parse_args() if not os.path.exists(args.bedlist): printtime('ERROR: (--bedlist: %s) - No such file or directory' % args.state) return 1 if not os.path.exists(args.part_vcf_dir): printtime('ERROR: (--part_vcf_dir: %s) - No such file or directory' % args.state) return 1 vcf_basebane = [] with open(args.bedlist, 'r') as beds: for bed in beds: vcf_basebane.append('{}{}'.format(os.path.splitext(os.path.basename(bed))[0],args.suffix)) status = False i = 30 while i: status = check_part_vcf(vcf_basebane, args.part_vcf_dir) if status: if args.index_check: status = check_part_vcf_index(vcf_basebane, args.part_vcf_dir, '.tbi') if not status: status = check_part_vcf_index(vcf_basebane, args.part_vcf_dir, '.idx') break time.sleep(5) i -= 1 link_vcf_dir = '' if args.outdir: link_vcf_dir = os.path.abspath(args.outdir) if os.path.exists(link_vcf_dir): printtime('ERROR: (--outdir: %s) - This directory is already exists! Please remove it and check again!' % link_vcf_dir) return -1 os.makedirs(link_vcf_dir) if status: print("part_vcf_dir is good!") if link_vcf_dir: link_vcf(vcf_basebane, args.part_vcf_dir, link_vcf_dir) else: print("part_vcf_dir is bad!") return -1 return 0
else: if state.analysisList[0] == 'init' and state.analysisList[1] == 'filter': state.analysisList = state.analysisList[1:] jobs = ','.join(state.analysisList) rerun_fh.write('%s\t%s\n' % (state.option.multiSampleName,jobs)) json.dump(state, open('%s/state.json' % usercfg.stateDir, 'w'),indent=4) sys.stdout.flush() sys.stderr.flush() # json.dump(state, open('%s/state.json' % usercfg.stateDir, 'w'),indent=4) #write run.sh runShell = os.path.join(usercfg.scriptsDir,'run.sh') writeRunShell(runShell,state) os.chmod(runShell, stat.S_IRWXU+stat.S_IRGRP+stat.S_IXGRP+stat.S_IROTH+stat.S_IXOTH) printtime("\nPlease run scripts/run.sh to submit tasks.") subprocess.call("sh %s write" % runShell,shell=True) runShellTest = os.path.join(usercfg.scriptsDir,'test.sh') with open(runShellTest, 'w') as f: state_file = os.path.join(state.stateDir,'state.json') rerun_list_file = os.path.join(state.stateDir,'rerun.list') f.write("#!/bin/sh\n") f.write("source {}/bin/activate\n".format(state.GAEA_HOME)) f.write("job_scheduler.py -s {} -r {}\n".format(state_file, rerun_list_file)) os.chmod(runShell, stat.S_IRWXU+stat.S_IRGRP+stat.S_IXGRP+stat.S_IROTH+stat.S_IXOTH) def main(argv=None): # IGNORE:C0111 '''Command line options.''' if argv is None:
def main(): program_name = os.path.basename(sys.argv[0]) program_license = '''{0} Created by huangzhibo on {1}. Last updated on {2}. Copyright 2017 BGI bigData. All rights reserved. USAGE'''.format(" v".join([program_name, __version__]), str(__date__), str(__updated__)) parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-b", "--bedlist", help="sample_name,[default: %(default)s]", required=True) parser.add_argument("-o", "--output", help="output file path,[default: %(default)s]", required=True) parser.add_argument( "-g", "--gvcf_out", help="gvcf output file path,[default: %(default)s]", ) parser.add_argument("-p", "--part_vcf_dir", help="part_vcf_dir,[default: %(default)s]", required=True) if len(sys.argv) == 1: parser.print_help() exit(1) # Process arguments args = parser.parse_args() if not os.path.exists(args.bedlist): printtime('ERROR: (--bedlist: %s) - No such file or directory' % args.state) return 1 if not os.path.exists(args.part_vcf_dir): printtime('ERROR: (--part_vcf_dir: %s) - No such file or directory' % args.state) return 1 bed_prefix = [] with open(args.bedlist, 'r') as beds: for bed in beds: bed_prefix.append(os.path.splitext(os.path.basename(bed))[0]) i = 30 while i: status = check_part_vcf(bed_prefix, args.part_vcf_dir) if status: break time.sleep(5) i -= 1 if status: print "part_vcf_dir is good!" merge_vcf(bed_prefix, args.part_vcf_dir, args.output) if args.gvcf_out: merge_vcf(bed_prefix, args.part_vcf_dir, args.gvcf_out, True) else: print "part_vcf_dir is bad!" return 0