def phylogeny(args, partition_file, phylip):
    basedir = os.getcwd()
    try:
        os.mkdir('4_phylogeny')
    except OSError:
        pass
    os.chdir('4_phylogeny')

    os.symlink(partition_file, partition_file.split('/')[-1])
    os.symlink(phylip, phylip.split('/')[-1])
    partition_file = partition_file.split('/')[-1]
    phylip = phylip.split('/')[-1]

    tree = partition_file + '.treefile'

    if not os.path.isfile(tree):
        job = '{} -nt AUTO -s {} -m MFP -alrt 1000 -bb 1000 -spp {}'
        job = job.format(cfg.iqtree, phylip, partition_file)
        ID = submit(job,
                    partition=cfg.SLURMpartition,
                    account=cfg.SLURMaccount,
                    qos=cfg.SLURMqos,
                    time='24:0:0',
                    job_name='IQTree',
                    cpus_per_task=cfg.SLURMcpus,
                    mem_per_cpu=cfg.SLURMmem,
                    modules=cfg.modules)
        job_wait(ID)

        err = 'IQTree_{}.err'.format(ID)
        out = 'IQTree_{}.out'.format(ID)
        cleanup(logs=[out, err])

    os.chdir(basedir)
    return '4_phylogeny/' + tree
def orthology(args):
    # Find orthologous sequences between all genomes and write them to fasta files.

    # Align any queries to the reference that have not yet been aligned.
    deltafiles = runmummer(args)

    OS_file = '1_orthology/' + args.output + '.OS.txt'
    if not os.path.isfile(OS_file) or args.force:
        job = '{} {} {}'.format(sys.executable, __file__,
                                args.output + '.args.pickle')

        ID = submit(job,
                    partition=cfg.SLURMpartition,
                    account=cfg.SLURMaccount,
                    qos=cfg.SLURMqos,
                    time='12:0:0',
                    job_name='py_ortho',
                    cpus_per_task=cfg.SLURMcpus,
                    mem_per_cpu=cfg.SLURMmem,
                    modules=cfg.modules)
        job_wait(ID)
        out_file = 'py_ortho_' + str(ID) + '.out'
        err_file = 'py_ortho_' + str(ID) + '.err'

        if not os.path.isfile(OS_file):
            exit_message = 'Orthology search failed.\n\n{}:\n{}'
            exit_message = exit_message.format(err_file,
                                               open(err_file, 'r').read())
            sys.stderr.write(exit_message)
            cleanup([out_file, err_file])
            return None
        else:
            cleanup([out_file, err_file])

    return get_segments(args)
def submit_tiger2(args, alignment):
    # Rate partitioning usually takes under 0.5s per unique pattern
    # in the alignment. There is no fast way to know how many unique
    # patterns there will be in an alignment of a given size.
    pps = 0.1  # empirical guess of high patterns per site # 0.05 for most, 0.1 for extremely diverse clade N. rajui
    records = len(args.genomes)
    sites = int(total_length(alignment) / records)
    estimated_patterns = pps * sites
    estimated_runtime = int(estimated_patterns * 0.5)
    minutes = int(estimated_runtime / 60) + 1
    if minutes > (cfg.LARGEmaxtime * 60):
        warning = "Warning: estimated partition time ({}) is greater than maximum wallclock time ({}).\n"
        warning = warning.format(minutes, cfg.LARGEmaxtime * 60)
        sys.stderr.write(warning)
        minutes = cfg.LARGEmaxtime * 60

    command = '{} -in {} -a dna -out {} -f phylip -bt rota -b 4 -t 1'
    command = command.format(cfg.tiger, alignment, args.output)
    ID = submit(command,
                partition=cfg.LARGEpartition,
                account=cfg.LARGEaccount,
                qos=cfg.LARGEqos,
                time=str(minutes),
                job_name='rate_partitioning',
                cpus_per_task=cfg.LARGEcpus,
                mem_per_cpu=cfg.LARGEmem,
                modules=cfg.modules)
    job_wait(ID)

    outfile = 'rate_partitioning_' + str(ID) + '.out'
    errfile = 'rate_partitioning_' + str(ID) + '.err'
    cleanup([outfile, errfile])
예제 #4
0
def submit_alignment_batch(job):
    ID = submit(job,
                partition=cfg.LARGEpartition,
                account=cfg.LARGEaccount,
                qos=cfg.LARGEqos,
                time='36:0:0',
                job_name='mafft',
                cpus_per_task=cfg.LARGEcpus,
                mem_per_cpu=cfg.LARGEmem,
                modules=cfg.modules)
    job_wait(ID)
    return ID
def submit_phylogeny(job):
    ID = submit(job,
                partition = cfg.SLURMpartition,
                account = cfg.SLURMaccount,
                qos = cfg.SLURMqos,
                time = '12:0:0',
                job_name = 'ExaML',
                cpus_per_task = cfg.SLURMcpus,
                mem_per_cpu = cfg.SLURMmem,
                modules = cfg.modules)
    job_wait(ID)
    return ID
def runmummer(args):
    basedir = os.getcwd()
    try:
        os.mkdir('1_orthology')
    except OSError:
        pass
    os.chdir('1_orthology')

    existing, absent = find_mummer_files(args)
    if absent:
        try:
            os.mkdir('logs')
        except OSError:
            pass

        jobs = [(nucmer, [args.reference] + absence) for absence in absent]
        esttime = '{}:0:0'.format(int(
            4 * (len(jobs) / cfg.SLURMcpus + 1)))  # estimate 4 hours per query
        ID = submit(jobs,
                    pool=True,
                    partition=cfg.SLURMpartition,
                    account=cfg.SLURMaccount,
                    qos=cfg.SLURMqos,
                    time=esttime,
                    job_name='mummer',
                    cpus_per_task=cfg.SLURMcpus,
                    mem_per_cpu=cfg.SLURMmem,
                    modules=cfg.modules)
        job_wait(ID)

        # all alignments should now exist
        existing.extend(absent)

    queries, prefixes, deltafiles, tempfiles, filterfiles = list(
        zip(*existing))

    os.chdir(basedir)
    return filterfiles