Example #1
0
def calculate_motifs(input_seqs, input_seqs2, width):
    
    print input_seqs, input_seqs2
    input_seqs = fasta.read_seqs(open(input_seqs).readlines())
    input_seqs2 = fasta.read_seqs(open(input_seqs2).readlines())

    foreground = get_quorums(input_seqs, width)
    background = get_quorums(input_seqs2, width)

    N = len(input_seqs) + len(input_seqs2)

    res_motifs = []
    for i in foreground:
        term1 = choose(background[i], foreground[i])
        term2 = choose((N - background[i]), len(input_seqs) - 1)
        term3 = choose(N, len(input_seqs))
        p = (float(term1) * float(term2)) / term3
        if 0 < p <= 0.0001:
            res_motifs.append(i + '\t' + str(foreground[i]) + '\t' + str(background[i]) + '\t' + str(p))
    
    res_motifs.sort()
    return res_motifs
def get_quorums(seqs, mlen):
    """
    add seq id_no to a set
    use explicit counter to create seq_no
    """
    quorum = defaultdict(set)
    id_no = 0
    for seq in seqs:
        id_no += 1
        for n in range(len(seq) - mlen):
            quorum[seq[n:n + mlen]].add(id_no)
    return quorum


input_seqs = fasta.read_seqs(open(sys.argv[1]).readlines())
input_seqs2 = fasta.read_seqs(open(sys.argv[2]).readlines())

foreground = get_quorums(input_seqs, 10)
background = get_quorums(input_seqs2, 10)

N = len(input_seqs) + len(input_seqs2)

for i in foreground:
    term1 = choose(len(background[i]), len(foreground[i]))
    term2 = choose((N - len(background[i])), len(input_seqs) - 1)
    term3 = choose(N, len(input_seqs))
    p = (float(term1) * float(term2)) / term3
    if 0 < p <= 0.0001:
        print i, len(foreground[i]), len(background[i]), p
#! /usr/bin/env python

import fasta
import sys

data = fasta.read_seqs(open(sys.argv[1], "r").readlines())
print [len(seq) for seq in data]
#! /usr/bin/env python

import fasta
import sys

data = fasta.read_seqs(open(sys.argv[1], 'r').readlines())
print [len(seq) for seq in data]
        return ntok // ktok
    else:
        return 0

def get_quorums(seqs, mlen):
    """
    add seq id_no to a set
    use explicit counter to create seq_no
    """
    quorum = defaultdict(int)
    for seq in seqs:
        for n in range(len(seq) - mlen):
            quorum[seq[n:n + mlen]] += 1
    return quorum

input_seqs = fasta.read_seqs(open(sys.argv[1]).readlines())
input_seqs2 = fasta.read_seqs(open(sys.argv[2]).readlines())

foreground = get_quorums(input_seqs, 10)
background = get_quorums(input_seqs2, 10)

N = len(input_seqs) + len(input_seqs2)

res_motifs = []
for i in foreground:
    term1 = choose(len(background[i]), len(foreground[i]))
    term2 = choose((N - len(background[i])), len(input_seqs)-1)
    term3 = choose(N, len(input_seqs))
    p = (float(term1) * float(term2)) / term3
    if 0 < p <= 0.0001:
        res_motifs.append(i + '\t' + str(foreground[i]) + '\t' + str(background[i]) + '\t' + str(p))