예제 #1
0
def runwindow(input, outgroup):
    print "Running sliding-window method, using %i bp flanking each SNP." % args.window
    tmpdir = 'window_tmp'
    if os.path.isdir(tmpdir):
        shutil.rmtree(tmpdir)
    os.makedirs(tmpdir)
    snpdb = {}
    count = 0
    with open(input+'.tab', 'rU') as data:
        next(data) #skip header
        for line in data:
            count += 1
            cols = line.split('\t')
            scaffold = cols[0]
            pos = cols[1]
            name = 'snp'+str(count)
            if not name in snpdb:
                snpdb[name] = [scaffold, int(pos) - args.window-1, int(pos) + args.window]
    sequencedb = {}
    for num, id in enumerate(samples):
        if id == 'Reference':
            fastafile = os.path.join(samples[num+1], 'reference', 'ref.fa')
        else:
            fastafile = os.path.join(id, 'snps.consensus.subs.fa')
        sequence = []
        with open(fastafile, 'rU') as fasta:
            sequence_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
            for k,v in natsorted(snpdb.iteritems()):
                rec = sequence_dict.get(v[0])
                sequence.append(str(rec.seq[v[1]:v[2]]))
        if id not in sequencedb:
            sequencedb[id] = ''.join(sequence)
    with open(os.path.join(tmpdir, 'windows.fa'), 'w') as output:
        for k,v in sequencedb.iteritems():
            output.write('>%s\n%s\n' % (k,v))
    #run alignment on output
    with open(os.path.join(tmpdir, 'windows.mafft.fa'), 'w') as output:
        subprocess.call(['mafft', '--thread', str(args.cpus), '--auto', os.path.join(tmpdir, 'windows.fa')], stdout = output, stderr = FNULL)
    #now trim alignment
    subprocess.call(['trimal', '-in', os.path.join(tmpdir, 'windows.mafft.fa'), '-out', os.path.join(tmpdir, 'windows.trimal.fa'), '-automated1'])

    if not outgroup:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', 'windows.trimal.fa', '-n', 'windows.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL)
    else:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', 'windows.trimal.fa', '-o', outgroup, '-n', 'windows.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL)

     #parse with biopython and draw
    trees = list(Phylo.parse(os.path.join(tmpdir, 'RAxML_bootstrap.windows.nwk'), 'newick'))
    best = Phylo.read(os.path.join(tmpdir,'RAxML_bestTree.windows.nwk'), 'newick')
    support_tree = get_support(best, trees)
    Phylo.write(support_tree, args.input+'.windows.phylogeny.nwk', 'newick')
    Phylo.draw(support_tree, do_show=False)
    pylab.axis('off')
    pylab.savefig(args.input+'.windows.phylogeny.pdf', format='pdf', bbox_inches='tight', dpi=1000)
예제 #2
0
def runbinary(input, outgroup):
    print "Runnning binary (presence/absence) method."
    tmpdir = 'binary_tmp'
    if os.path.isdir(tmpdir):
        shutil.rmtree(tmpdir)
    os.makedirs(tmpdir)
    datadb = []
    with open(input+'.binary.fa', 'w') as output:
        with open(input+'.vcf', 'rU') as vcf:
            for line in vcf:
                line = line.replace('\n', '')
                if line.startswith('##'):
                    continue
                data = line.split('\t')[9:]
                if '3' in data:
                    data1 = ['0' if x=='3' else x for x in data]
                    data1 = ['0' if x=='2' else x for x in data1]
                    datadb.append(data1)
                    data2 = ['0' if x=='1' else x for x in data]
                    data2 = ['1' if x=='2' else x for x in data2]
                    data2 = ['0' if x=='3' else x for x in data2]
                    data3 = ['0' if x=='1' else x for x in data]
                    data3 = ['0' if x=='2' else x for x in data3]
                    data3 = ['1' if x=='3' else x for x in data3]
                    datadb.append(data3)
                elif '2' in data:
                    data1 = ['0' if x=='2' else x for x in data]
                    datadb.append(data1)
                    data2 = ['0' if x=='1' else x for x in data]
                    data2 = ['1' if x=='2' else x for x in data2]
                    datadb.append(data2)
                else:
                    datadb.append(data)
        binarydata = [list(x) for x in zip(*datadb)]
        for i in binarydata:
            output.write(">%s\n%s\n" % (i[0], ''.join(i[1:])))

    if not outgroup:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'BINGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(input+'.binary.fa'), '-n', 'binary.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL)
    else:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'BINGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(input+'.binary.fa'), '-o', outgroup, '-n', 'binary.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL)

     #parse with biopython and draw
    trees = list(Phylo.parse(os.path.join(tmpdir, 'RAxML_bootstrap.binary.nwk'), 'newick'))
    best = Phylo.read(os.path.join(tmpdir,'RAxML_bestTree.binary.nwk'), 'newick')
    support_tree = get_support(best, trees)
    Phylo.write(support_tree, args.input+'.binary.phylogeny.nwk', 'newick')
    Phylo.draw(support_tree, do_show=False)
    pylab.axis('off')
    pylab.savefig(args.input+'.binary.phylogeny.pdf', format='pdf', bbox_inches='tight', dpi=1000)
예제 #3
0
def main():
    args = parser.parse_args()

    table = args.table
    if not table:
        print("No input table provided. This argument is required\n")
        parser.print_help()
        exit(0)

    df = pd.read_csv(table, sep=args.delim, quotechar='"', index_col=False)
    for c in df.columns:
        if c.lower() == "sample id":
            df.set_index(c)
        else:
            df[c] = pd.to_numeric(df[c], errors="coerce", downcast="integer")

    df.dropna(axis=1, how="all", inplace=True)
    data = df.values.astype(int)
    names = df.index.tolist()

    constructor = DistanceTreeConstructor()
    if args.upgma:
        const = constructor.upgma
    else:
        const = constructor.nj

    if args.tree:
        main_tree = Phylo.read(args.tree, "newick")
    else:
        main_tree = const(get_distance_matrix(data, names))

    trees = [
        const(get_distance_matrix(sub, names))
        for sub in bootstrap(data, args.times)
    ]
    supp_tree = get_support(main_tree, trees)

    # remove labels of inner clades
    for clade in supp_tree.get_nonterminals():
        clade.name = ""

    Phylo.write(supp_tree, args.output, "newick")
예제 #4
0
def runsnps(input, outgroup):
    print "Running concatenated SNP method."
    file_in = input+'.aln'
    tmpdir = 'snps_tmp'
    if os.path.isdir(tmpdir):
        shutil.rmtree(tmpdir)
    os.makedirs(tmpdir)
    if not outgroup:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(file_in), '-n', 'snps.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL)
    else:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(file_in), '-o', outgroup, '-n', 'snps.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL)

     #parse with biopython and draw
    trees = list(Phylo.parse(os.path.join(tmpdir, 'RAxML_bootstrap.snps.nwk'), 'newick'))
    best = Phylo.read(os.path.join(tmpdir,'RAxML_bestTree.snps.nwk'), 'newick')
    support_tree = get_support(best, trees)
    Phylo.write(support_tree, args.input+'.snps.phylogeny.nwk', 'newick')
    Phylo.draw(support_tree, do_show=False)
    pylab.axis('off')
    pylab.savefig(args.input+'.snps.phylogeny.pdf', format='pdf', bbox_inches='tight', dpi=1000)
예제 #5
0
subprocess.call(['trimal', '-in', mafft_out, '-out', trimal_out, '-automated1', '-phylip'], stderr = FNULL, stdout = FNULL)

print "Running RAxML"
if args.method == 'prot':
    method = 'PROTGAMMAAUTO'
elif args.method == 'nucl':
    method = 'GTRGAMMA'
if args.cpus == 1:
    if not args.outgroup:
        subprocess.call(['raxmlHPC-PTHREADS', '-f', 'a', '-m', method, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp)
    else:
        subprocess.call(['raxmlHPC-PTHREADS', '-f', 'a', '-m', method, '-o', args.outgroup, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp)
else:
    if not args.outgroup:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', method, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp)
    else:
        subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', method, '-o', args.outgroup, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp)

print "Drawing tree inferred from RAxML"
#parse with biopython and draw
trees = list(Phylo.parse(os.path.join(tmp, 'RAxML_bootstrap.nwk'), 'newick'))
best = Phylo.read(os.path.join(tmp, 'RAxML_bestTree.nwk'), 'newick')
support_tree = get_support(best, trees)
Phylo.write(support_tree, args.out.split('.')[0]+'.nwk', 'newick')
Phylo.draw(support_tree, do_show=False)
pylab.axis('off')
pylab.savefig(args.out, format='pdf', bbox_inches='tight', dpi=1000) 

if not args.debug:
    shutil.rmtree(tmp)