Esempio n. 1
0
def main():
    # test script
    p = PyPhy()

    from seqUtils import convert_fasta
    with open('test/shankarappa-p1.fa', 'rU') as f:
        fasta = convert_fasta(f)

    out = p.read_data(fasta, is_codon=True)
    print out

    t = '((A:1,B:2):0.5,C:0.2):0;'
    out = p.read_tree(t)
    print out

    p.set_model(is_codon=True)
Esempio n. 2
0
def main():
    # test script
    p = PyPhy()

    from seqUtils import convert_fasta

    with open("test/shankarappa-p1.fa", "rU") as f:
        fasta = convert_fasta(f)

    out = p.read_data(fasta, is_codon=True)
    print out

    t = "((A:1,B:2):0.5,C:0.2):0;"
    out = p.read_tree(t)
    print out

    p.set_model(is_codon=True)
Esempio n. 3
0
"""
Use project file to punch out genes from FDA amino acid refs
"""
import os
import HyPhy
import hyphyAlign
import json
from seqUtils import convert_fasta

hyphy = HyPhy._THyPhy (os.getcwd(), 1) # instance of HyPhy
hyphyAlign.change_settings(hyphy)  # default settings

handle = open('fda_hcv_polyprotein.fa', 'rU')
fasta = convert_fasta(handle)
handle.close()

handle = open('/Users/art/git/MiseqPipeline/projects.json', 'rU')
proj = json.load(handle)
handle.close()

h77 = {}
for key in proj['regions'].iterkeys():
    if 'H77' in key and not key.endswith('seed'):
        aa = ''.join(proj['regions'][key]['reference'])
        h77.update({str(key): str(aa)})
        
outfile = open('fda_hcv_coords.fa', 'w')

for h, s in fasta:
    for gene, refseq in h77.iteritems():
        aquery, aref, ascore = hyphyAlign.pair_align(hyphy, refseq, s)
Esempio n. 4
0
"""
Use project file to punch out genes from FDA amino acid refs
"""
import os
import HyPhy
import hyphyAlign
import json
from seqUtils import convert_fasta

hyphy = HyPhy._THyPhy(os.getcwd(), 1)  # instance of HyPhy
hyphyAlign.change_settings(hyphy)  # default settings

handle = open('fda_hcv_polyprotein.fa', 'rU')
fasta = convert_fasta(handle)
handle.close()

handle = open('/Users/art/git/MiseqPipeline/projects.json', 'rU')
proj = json.load(handle)
handle.close()

h77 = {}
for key in proj['regions'].iterkeys():
    if 'H77' in key and not key.endswith('seed'):
        aa = ''.join(proj['regions'][key]['reference'])
        h77.update({str(key): str(aa)})

outfile = open('fda_hcv_coords.fa', 'w')

for h, s in fasta:
    for gene, refseq in h77.iteritems():
        aquery, aref, ascore = hyphyAlign.pair_align(hyphy, refseq, s)
Esempio n. 5
0
"""
Prune terminal branches on phylogeny given minimum distance cutoff.
"""
from Bio import Phylo
import sys
from seqUtils import convert_fasta

try:
    fasta = convert_fasta(open(sys.argv[1], 'rU').readlines())
    t = Phylo.read(sys.argv[2], 'newick')
    cutoff = float(sys.argv[3])
    outfile = open(sys.argv[4], 'w')
except:
    print 'Prune terminal branches on phylogeny given minimum distance cutoff.'
    print 'Filter the original sequence alignment based on the pruned tree.'
    print 'python prune_newick.py [fasta] [newick] [cutoff] [outfile]'
    raise


# make sure that tip names match sequence names in FASTA
seq_names = [h for h, s in fasta]
seq_names.sort()
tip_names = [tip.name for tip in t.get_terminals()]
tip_names.sort()
# actually this won't work because tip names got truncated :-P

while True:
    tips = t.get_terminals()
    pruned = False
    for tip in tips:
        if tip.branch_length < cutoff: