def phylogeny(args, partition_file, phylip): basedir = os.getcwd() try: os.mkdir('4_phylogeny') except OSError: pass os.chdir('4_phylogeny') os.symlink(partition_file, partition_file.split('/')[-1]) os.symlink(phylip, phylip.split('/')[-1]) partition_file = partition_file.split('/')[-1] phylip = phylip.split('/')[-1] tree = partition_file + '.treefile' if not os.path.isfile(tree): job = '{} -nt AUTO -s {} -m MFP -alrt 1000 -bb 1000 -spp {}' job = job.format(cfg.iqtree, phylip, partition_file) ID = submit(job, partition=cfg.SLURMpartition, account=cfg.SLURMaccount, qos=cfg.SLURMqos, time='24:0:0', job_name='IQTree', cpus_per_task=cfg.SLURMcpus, mem_per_cpu=cfg.SLURMmem, modules=cfg.modules) job_wait(ID) err = 'IQTree_{}.err'.format(ID) out = 'IQTree_{}.out'.format(ID) cleanup(logs=[out, err]) os.chdir(basedir) return '4_phylogeny/' + tree
def orthology(args): # Find orthologous sequences between all genomes and write them to fasta files. # Align any queries to the reference that have not yet been aligned. deltafiles = runmummer(args) OS_file = '1_orthology/' + args.output + '.OS.txt' if not os.path.isfile(OS_file) or args.force: job = '{} {} {}'.format(sys.executable, __file__, args.output + '.args.pickle') ID = submit(job, partition=cfg.SLURMpartition, account=cfg.SLURMaccount, qos=cfg.SLURMqos, time='12:0:0', job_name='py_ortho', cpus_per_task=cfg.SLURMcpus, mem_per_cpu=cfg.SLURMmem, modules=cfg.modules) job_wait(ID) out_file = 'py_ortho_' + str(ID) + '.out' err_file = 'py_ortho_' + str(ID) + '.err' if not os.path.isfile(OS_file): exit_message = 'Orthology search failed.\n\n{}:\n{}' exit_message = exit_message.format(err_file, open(err_file, 'r').read()) sys.stderr.write(exit_message) cleanup([out_file, err_file]) return None else: cleanup([out_file, err_file]) return get_segments(args)
def submit_tiger2(args, alignment): # Rate partitioning usually takes under 0.5s per unique pattern # in the alignment. There is no fast way to know how many unique # patterns there will be in an alignment of a given size. pps = 0.1 # empirical guess of high patterns per site # 0.05 for most, 0.1 for extremely diverse clade N. rajui records = len(args.genomes) sites = int(total_length(alignment) / records) estimated_patterns = pps * sites estimated_runtime = int(estimated_patterns * 0.5) minutes = int(estimated_runtime / 60) + 1 if minutes > (cfg.LARGEmaxtime * 60): warning = "Warning: estimated partition time ({}) is greater than maximum wallclock time ({}).\n" warning = warning.format(minutes, cfg.LARGEmaxtime * 60) sys.stderr.write(warning) minutes = cfg.LARGEmaxtime * 60 command = '{} -in {} -a dna -out {} -f phylip -bt rota -b 4 -t 1' command = command.format(cfg.tiger, alignment, args.output) ID = submit(command, partition=cfg.LARGEpartition, account=cfg.LARGEaccount, qos=cfg.LARGEqos, time=str(minutes), job_name='rate_partitioning', cpus_per_task=cfg.LARGEcpus, mem_per_cpu=cfg.LARGEmem, modules=cfg.modules) job_wait(ID) outfile = 'rate_partitioning_' + str(ID) + '.out' errfile = 'rate_partitioning_' + str(ID) + '.err' cleanup([outfile, errfile])
def submit_alignment_batch(job): ID = submit(job, partition=cfg.LARGEpartition, account=cfg.LARGEaccount, qos=cfg.LARGEqos, time='36:0:0', job_name='mafft', cpus_per_task=cfg.LARGEcpus, mem_per_cpu=cfg.LARGEmem, modules=cfg.modules) job_wait(ID) return ID
def submit_phylogeny(job): ID = submit(job, partition = cfg.SLURMpartition, account = cfg.SLURMaccount, qos = cfg.SLURMqos, time = '12:0:0', job_name = 'ExaML', cpus_per_task = cfg.SLURMcpus, mem_per_cpu = cfg.SLURMmem, modules = cfg.modules) job_wait(ID) return ID
def runmummer(args): basedir = os.getcwd() try: os.mkdir('1_orthology') except OSError: pass os.chdir('1_orthology') existing, absent = find_mummer_files(args) if absent: try: os.mkdir('logs') except OSError: pass jobs = [(nucmer, [args.reference] + absence) for absence in absent] esttime = '{}:0:0'.format(int( 4 * (len(jobs) / cfg.SLURMcpus + 1))) # estimate 4 hours per query ID = submit(jobs, pool=True, partition=cfg.SLURMpartition, account=cfg.SLURMaccount, qos=cfg.SLURMqos, time=esttime, job_name='mummer', cpus_per_task=cfg.SLURMcpus, mem_per_cpu=cfg.SLURMmem, modules=cfg.modules) job_wait(ID) # all alignments should now exist existing.extend(absent) queries, prefixes, deltafiles, tempfiles, filterfiles = list( zip(*existing)) os.chdir(basedir) return filterfiles