Esempio n. 1
0
from readros import read_rosalind as rr

word = list(rr('test.txt').values())[0]

table = [0] * (len(word) + 1)


def kmp_table(W, T):
    pos = 1
    cnd = 0

    T[0] = -1

    while pos < len(W):
        if W[pos] == W[cnd]:
            T[pos] = T[cnd] + 1
        else:
            T[pos] = cnd
            cnd = T[cnd]
            while cnd >= 0 and W[pos] != W[cnd]:
                cnd = T[cnd]
        pos = pos + 1
        cnd = cnd + 1

    T[pos] = cnd
    return T


a = [abs(x) for x in kmp_table(word, table)]
del a[0]
from readros import read_rosalind as rr

textd = rr("rosalind_grph.txt")

listbeg = []
listend = []

for k, v in textd.items():
    listbeg.append(k)
    listend.append(k)
    listbeg.append(v[:3])
    listend.append(v[-3:])

dictpref = dict(zip(listbeg[::2], listbeg[1::2]))
dictsuf = dict(zip(listend[::2], listend[1::2]))

text_file = open("Output.txt", "w")

for k, v in dictsuf.items():
    for a, b in dictpref.items():
        if b == v and k != a:
            stri = k + " " + a
            text_file.write(stri)
            text_file.write("\n")

text_file.close()
Esempio n. 3
0
from readros import read_rosalind as rr
import numpy as np

seq = list(rr("test.txt").values())

dst = np.zeros((len(seq), len(seq)))


def difference(a, b):
    c = 0
    for i in range(len(a)):
        if a[i] != b[i]:
            c += 1
    return c / len(a)


for i in range(len(seq)):
    for j in range(len(seq)):
        dst[i][j] = difference(seq[i], seq[j])

for row in dst:
    for a in row:
        print('{0:.5f}'.format(a), end=" ")
    print()
Esempio n. 4
0
from readros import read_rosalind as rr

sequence = rr("test.txt")
SEQUENCES = list(sequence.values())

print(SEQUENCES)
from readros import read_rosalind as rr

sequences = list(rr("test.txt").values())

transitions = 0
transversions = 0
for i in range(0, len(sequences[0])):
    if sequences[0][i] == "A" and sequences[1][i] == "G" or sequences[0][
            i] == "G" and sequences[1][i] == "A":
        transitions += 1
    elif sequences[0][i] == "C" and sequences[1][i] == "T" or sequences[0][
            i] == "T" and sequences[1][i] == "C":
        transitions += 1
    elif sequences[0][i] != sequences[1][i]:
        transversions += 1

print('{0:.11f}'.format(transitions / transversions))
Esempio n. 6
0
from readros import read_rosalind as rr
from itertools import product
import re
import sre_constants


seq = rr('test.txt').values()
seq = list(seq)
seq = str(seq[0])
bases = ['A', 'T', 'C', 'G']
new_data = []
for kmers in product(bases, repeat=4):
    new_data.append(''.join(kmers))


kmer_list = []
seq_divided = []
new_data.sort()
for i in range(len(seq)):
    seq_divided.append(seq[i:i+4])

for kmer in new_data:
    kmer_list.append(seq_divided.count(kmer))

print(*kmer_list)
Esempio n. 7
0
    a = []
    b = []
    #Find all start codons and cut all previous bases
    for i in range(0, len(DNA)):
        if DNA[i:i+3] == "AUG":
            a.append(DNA[i:])
    #Find all stop codons and cut the string before them, resulting in a string in "AUG....." format
    for i in a:
        for j in range(0, len(i), 3):
            if i[j:j+3] == "UAA" or i[j:j+3] == "UGA" or i[j:j+3] == "UAG" and i[:j] not in b:
                b.append(i[:j])
                break
    return b


dna = list(rr("rosalind_splc.txt").values())
dna = dna[0]
dna = dna.replace("T", "U")
#Find all proteins in sequence (without reverse-complementing it)
prereverse = findstartstop(dna)
for i in range(0, len(prereverse)):
    prereverse[i] = translate(prereverse[i],3)
    prereverse[i] = "".join(prereverse[i])
rdna = reverse_complement(dna)
postreverse = findstartstop(rdna)
for i in range(0, len(postreverse)):
    postreverse[i] = translate(postreverse[i],3)
    postreverse[i] = "".join(postreverse[i])

whole = prereverse
for i in postreverse:
Esempio n. 8
0
from readros import read_rosalind as rr
import numpy as np

text = rr("test.txt")
subs = list(text.values())
subst = [list(i) for i in subs]
subst = np.array(subst)
subst = subst.transpose()

subst = subst.tolist()

cstr = []
alist = []
clist = []
glist = []
tlist = []
for i in range(0, len(subst)):
    ca = subst[i].count("A")
    alist.append(ca)
for i in range(0, len(subst)):
    ca = subst[i].count("C")
    clist.append(ca)
for i in range(0, len(subst)):
    ca = subst[i].count("T")
    tlist.append(ca)
for i in range(0, len(subst)):
    ca = subst[i].count("G")
    glist.append(ca)

for i in range(0, len(alist)):
    cslist = [alist[i], clist[i], tlist[i], glist[i]]