Beispiel #1
0
# for elmseq in seq_percents_bird:
#     if elmseq in uniq:
#         print elmseq + '\t' + str(sum(seq_percents_bird[elmseq])/float(total_bird)) + '\t' + str(sum(seq_percents_human[elmseq])/float(total_human))

human_host_file = 'working/Jun29/elmdict_H_sapiens.simple'
chicken_host_file = 'working/Jun29/elmdict_Gallus_gallus.simple'
human_host_freqs = get_host_freqs(human_host_file)
chicken_host_freqs = get_host_freqs(chicken_host_file)

with open('working/hi', 'w') as hi:
    with open('working/low', 'w') as low:
        for elmseq in uniq:
            if 'X' not in elmseq and 'J' not in elmseq and 'B' not in elmseq:
                per = sp[elmseq]
                dels = elmseq.split(':')
                new_seq = utils.mk_sub(dels[2])
                key = dels[1] + ':' + new_seq
                human_host = 0
                chicken_host = 0
                if key in chicken_host_freqs:
                    chicken_host = chicken_host_freqs[key]
                if key in human_host_freqs:
                    human_host = human_host_freqs[key]
                bird_flu_frac = sum(seq_percents_bird[elmseq])/float(total_bird)
                human_flu_frac = sum(seq_percents_human[elmseq])/float(total_human)
                if bird_flu_frac >= float(80):
                    hi.write(str(chicken_host) + '\n')
                else:
                    low.write(str(chicken_host) + '\n')

Beispiel #2
0
"""Convert raw sequences to simplified versions
   for use in scanning with simple patterns
   (those made with utils.mk_sub)"""
import sys, global_settings, os, utils

in_file = sys.argv[1]
out_file = sys.argv[2]

with open(out_file, 'w') as f:
    for ID, seq in utils.fasta_iter(in_file):
        f.write('>' + ID + '\n')
        f.write(utils.mk_sub(seq) + '\n')
Beispiel #3
0
"""There is little overlap between
   host and flu sequences. To solve
   this problem, I'm substiting some
   residues with symbols for their 
   properties"""
import sys, global_settings, utils
from collections import defaultdict

elmdict_file = sys.argv[1]
elm2seq2count = defaultdict(utils.init_zero)
elm2count = defaultdict(utils.init_zero)

with open(elmdict_file) as f:
    for line in f:
        (elm, seq, count, frq) = line.strip().split('\t')
        if 'X' not in seq:
            new_seq = utils.mk_sub(seq)
            elm2seq2count[elm+':'+new_seq] += int(count)
            elm2count[elm] += int(count)

for elmSeq in elm2seq2count:   
    elm, seq = elmSeq.split(':')
    count = elm2seq2count[elmSeq]
    freq = float(count)/float(elm2count[elm])
    print('%s\t%s\t%d\t%.10f' %
          (elm, seq, count, freq))
    
Beispiel #4
0
"""Substitute residues for properties
   in flu ELMs file"""
import sys, global_settings, utils
from collections import defaultdict

with open(sys.argv[1]) as f:
    for line in f:
        (protein, st, stp, elm, seq, ELM) = line.strip().split('\t')
        if 'X' not in seq and 'B' not in seq and 'Z' not in seq and 'J' not in seq:
            print('\t'.join((protein, st, stp, 
                             elm, utils.mk_sub(seq), ELM)))