Beispiel #1
0
def main():
	data = readFASTA(sys.argv[1])
	dna_len = len(data[0][1])
	profile = [{'A' : 0, 'C' : 0, 'G' : 0, 'T' : 0}]*dna_len

	for i in range(dna_len):
		count = {'A' : 0, 'C' : 0, 'G' : 0, 'T' : 0}
		for j in range(0, len(data)):
			dna = data[j][1]
			count[dna[i]] += 1

		profile[i] = count

	dna = str()
	for pair in profile:
		dna += getMaxProb(pair)

	print dna

	for nuc in 'ACGT':
		line = nuc + ':'
		for pair in profile:
			line += ' ' + str(pair[nuc])
			
		print line
Beispiel #2
0
def main():
    data = readFASTA(sys.argv[1])
    dna_len = len(data[0][1])
    profile = [{'A': 0, 'C': 0, 'G': 0, 'T': 0}] * dna_len

    for i in range(dna_len):
        count = {'A': 0, 'C': 0, 'G': 0, 'T': 0}
        for j in range(0, len(data)):
            dna = data[j][1]
            count[dna[i]] += 1

        profile[i] = count

    dna = str()
    for pair in profile:
        dna += getMaxProb(pair)

    print dna

    for nuc in 'ACGT':
        line = nuc + ':'
        for pair in profile:
            line += ' ' + str(pair[nuc])

        print line
Beispiel #3
0
def main():
    data = readFASTA(sys.argv[1])

    dna = list()
    for fasta in data:
        dna.append(fasta[1])

    longest = long_substr(dna)

    print longest
Beispiel #4
0
def main():
    data = readFASTA(sys.argv[1])

    dna = data[0].dna

    for i in range(1, len(data)):
        dna = dna.replace(data[i].dna, '')

    prot = DNA_to_PROT(dna)
    print prot
Beispiel #5
0
def main():
    data = readFASTA(sys.argv[1])

    dna = data[0].dna

    for i in range(1, len(data)):
        dna = dna.replace(data[i].dna, '')

    prot = DNA_to_PROT(dna)
    print prot
Beispiel #6
0
def main():
    data = readFASTA(sys.argv[1])

    dna = data.dna
    n = len(dna)
    revc = revcomp_DNA(dna)
    locations = list()

    for l in range(4, 13):
        for i in range(n - l + 1):
            if dna[i:i + l] == revc[i:i + l]:
                locations.append(str(i + 1) + ' ' + str(l))

    print '\n'.join(locations)
Beispiel #7
0
def main():
    data = readFASTA(sys.argv[1])

    s, q = [d.dna for d in data]

    transitions = transversions = 0
    for i in range(len(q)):
        if (s[i] != q[i]):
            if (is_transition(s[i], q[i])):
                transitions += 1
            else:
                transversions += 1

    print 1. * transitions / transversions
Beispiel #8
0
def main():
    data = readFASTA(sys.argv[1])
    s, q = [d.dna for d in data]

    i = 0
    res = list()

    for c in q:
        idx = s.find(c, i)

        res.append(idx + 1)
        i = idx + 1

    print ' '.join(list(map(str, res)))
Beispiel #9
0
def main():
    data = readFASTA(sys.argv[1])
    s, q = [d.dna for d in data]

    i = 0
    res = list()

    for c in q:
        idx = s.find(c, i)

        res.append(idx + 1)
        i = idx + 1

    print ' '.join(list(map(str, res)))
Beispiel #10
0
def main():
    data = readFASTA(sys.argv[1])

    s, q = [d.dna for d in data]

    transitions = transversions = 0
    for i in range(len(q)):
        if(s[i] != q[i]):
            if(is_transition(s[i], q[i])):
                transitions += 1
            else:
                transversions += 1

    print 1.*transitions/transversions
Beispiel #11
0
def main():
    data = readFASTA(sys.argv[1])

    dna = data.dna
    n = len(dna)
    revc = revcomp_DNA(dna)
    locations = list()

    for l in range(4, 13):
        for i in range(n - l + 1):
            if dna[i:i + l] == revc[i:i + l]:
                locations.append(str(i + 1) + ' ' + str(l))

    print '\n'.join(locations)
Beispiel #12
0
def main():
    data = readFASTA(sys.argv[1])

    matrix = zeros((len(data), len(data)))

    for i in range(len(data)):
        for j in range(i + 1, len(data)):
            matrix[i][j] = p_distance(data[i][1], data[j][1])
            matrix[j][i] = matrix[i][
                j]  # this matrix is mirrored by main diag, so we can compute only part of matrix

    for i in range(len(data)):
        line = ' '.join(map(str, matrix[i]))
        print line
Beispiel #13
0
def main():
    data = readFASTA(sys.argv[1])
    dna = data.dna

    fail = [0]*len(dna)
    fail[0] = 0

    for i in range(1, len(dna)):
        j = fail[i-1]

        while(j > 0 and dna[i] != dna[j]):
            j = fail[j - 1]

        if(dna[i] == dna[j]):
            j += 1

        fail[i] = j


    with open('output/kmp.txt', 'w') as output:
        output.write(' '.join(list(map(str, fail))))
Beispiel #14
0
                        help="Input FASTA")
    parser.add_argument('-o',
                        '--output',
                        required=False,
                        type=argparse.FileType('w'),
                        default=stdout,
                        help="Output")
    parser.add_argument('-a',
                        '--alpha',
                        required=False,
                        type=float,
                        default=float('inf'),
                        help="Gamma distribution alpha parameter")
    args = parser.parse_args()
    return args.input, args.output, args.alpha


# main code execution
infile, outfile, alpha = parseArgs()
seqs = readFASTA(infile)
infile.close()
keys = list(seqs.keys())
L = None
for k in keys:
    if L is None:
        L = len(seqs[k])
    assert L == len(seqs[k]), "All sequences must be of equal length"
for i in range(len(keys) - 1):
    for j in range(i + 1, len(keys)):
        outfile.write('%f\n' % jc69(seqs[keys[i]], seqs[keys[j]], alpha))
Beispiel #15
0
'''
Convert names of FASTA file to random safe names.
Sequences are output to STDOUT.
Dictionary of name mappings is output to STDERR.
'''
from common import ran_str, readFASTA
K = 20  # safenames will be length 20
ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

from sys import stdin, stderr
import argparse
parser = argparse.ArgumentParser(
    description=__doc__,
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-i',
                    '--input',
                    required=False,
                    type=argparse.FileType('r'),
                    default=stdin,
                    help="Input FASTA")
args = parser.parse_args()
seqs = readFASTA(args.input)
map = {}
for key in seqs:
    safe = ran_str(ALPHABET, K)
    while safe in map:
        safe = ran_str(ALPHABET, K)
    map[safe] = key
    print('>%s\n%s\n' % (safe, seqs[key]))
print(str(map), file=stderr)