Ejemplo n.º 1
0
    for i in xrange(len(data)-(k-1)):
        d[s[i:i+k]] += 1
    return d

def permutation_test(s, n=100):
    """Assess odds ratio on permuted string and return a list with scores."""
    res = []
    for _ in range(n):
        random.shuffle(s)
        f = count_kmers("".join(s), 2)
        res.append(log2(N * f[a + b] / float(f1[a] * f1[b])))
    return res

from main import get_seq

data = "".join(get_seq())

N = float(len(data))
f2 = count_kmers(data, 2)
f1 = count_kmers(data, 1)

a = "G"; b = "C"

odds_ratio = log2(N * f2[a + b] / float(f1[a] * f1[b]))
print odds_ratio

list_data = list(data)
res = permutation_test(list_data)
p = sum(1 for r in res if r>odds_ratio) / float(len(res))
print "OR=%5.3f, p=%7.5f" % (odds_ratio, p)
Ejemplo n.º 2
0
    offset = 0
    skipped = 0
    for i in xrange(min(len(seq1), len(seq2))):
        if i + offset == len(seq2)-1:
            break
        if(seq1[i] != seq2[i+offset]):
            if(seq1[i] == seq2[i+offset+1] and seq1[i+1] == seq2[i+offset+2]):
                offset += 1
                skipped += 1
                continue
            diff+=1
    print "skipped ", skipped
    return diff+skipped

if __name__ == '__main__':
    
    
    seqs = [get_seq("NC_001807.1.fasta"), get_seq("NC_001807.2.fasta"), get_seq("NC_001807.3.fasta"), 
            get_seq("NC_001807.4.fasta"), get_seq("NC_012920.1.fasta")]
    lens = []
    for s in seqs[:-1]:
        lens.append(simple_seq_diff(seqs[4], s))
        print "" 
        
    import pylab
    pylab.bar(range(len(lens)), lens)
    pylab.xlabel("NC_001807.1 NC_001807.2 NC_001807.3 NC_001807.4")
    pylab.ylabel("Num differences + num skips")
    pylab.show()