from readros import read_rosalind as rr word = list(rr('test.txt').values())[0] table = [0] * (len(word) + 1) def kmp_table(W, T): pos = 1 cnd = 0 T[0] = -1 while pos < len(W): if W[pos] == W[cnd]: T[pos] = T[cnd] + 1 else: T[pos] = cnd cnd = T[cnd] while cnd >= 0 and W[pos] != W[cnd]: cnd = T[cnd] pos = pos + 1 cnd = cnd + 1 T[pos] = cnd return T a = [abs(x) for x in kmp_table(word, table)] del a[0]
from readros import read_rosalind as rr textd = rr("rosalind_grph.txt") listbeg = [] listend = [] for k, v in textd.items(): listbeg.append(k) listend.append(k) listbeg.append(v[:3]) listend.append(v[-3:]) dictpref = dict(zip(listbeg[::2], listbeg[1::2])) dictsuf = dict(zip(listend[::2], listend[1::2])) text_file = open("Output.txt", "w") for k, v in dictsuf.items(): for a, b in dictpref.items(): if b == v and k != a: stri = k + " " + a text_file.write(stri) text_file.write("\n") text_file.close()
from readros import read_rosalind as rr import numpy as np seq = list(rr("test.txt").values()) dst = np.zeros((len(seq), len(seq))) def difference(a, b): c = 0 for i in range(len(a)): if a[i] != b[i]: c += 1 return c / len(a) for i in range(len(seq)): for j in range(len(seq)): dst[i][j] = difference(seq[i], seq[j]) for row in dst: for a in row: print('{0:.5f}'.format(a), end=" ") print()
from readros import read_rosalind as rr sequence = rr("test.txt") SEQUENCES = list(sequence.values()) print(SEQUENCES)
from readros import read_rosalind as rr sequences = list(rr("test.txt").values()) transitions = 0 transversions = 0 for i in range(0, len(sequences[0])): if sequences[0][i] == "A" and sequences[1][i] == "G" or sequences[0][ i] == "G" and sequences[1][i] == "A": transitions += 1 elif sequences[0][i] == "C" and sequences[1][i] == "T" or sequences[0][ i] == "T" and sequences[1][i] == "C": transitions += 1 elif sequences[0][i] != sequences[1][i]: transversions += 1 print('{0:.11f}'.format(transitions / transversions))
from readros import read_rosalind as rr from itertools import product import re import sre_constants seq = rr('test.txt').values() seq = list(seq) seq = str(seq[0]) bases = ['A', 'T', 'C', 'G'] new_data = [] for kmers in product(bases, repeat=4): new_data.append(''.join(kmers)) kmer_list = [] seq_divided = [] new_data.sort() for i in range(len(seq)): seq_divided.append(seq[i:i+4]) for kmer in new_data: kmer_list.append(seq_divided.count(kmer)) print(*kmer_list)
a = [] b = [] #Find all start codons and cut all previous bases for i in range(0, len(DNA)): if DNA[i:i+3] == "AUG": a.append(DNA[i:]) #Find all stop codons and cut the string before them, resulting in a string in "AUG....." format for i in a: for j in range(0, len(i), 3): if i[j:j+3] == "UAA" or i[j:j+3] == "UGA" or i[j:j+3] == "UAG" and i[:j] not in b: b.append(i[:j]) break return b dna = list(rr("rosalind_splc.txt").values()) dna = dna[0] dna = dna.replace("T", "U") #Find all proteins in sequence (without reverse-complementing it) prereverse = findstartstop(dna) for i in range(0, len(prereverse)): prereverse[i] = translate(prereverse[i],3) prereverse[i] = "".join(prereverse[i]) rdna = reverse_complement(dna) postreverse = findstartstop(rdna) for i in range(0, len(postreverse)): postreverse[i] = translate(postreverse[i],3) postreverse[i] = "".join(postreverse[i]) whole = prereverse for i in postreverse:
from readros import read_rosalind as rr import numpy as np text = rr("test.txt") subs = list(text.values()) subst = [list(i) for i in subs] subst = np.array(subst) subst = subst.transpose() subst = subst.tolist() cstr = [] alist = [] clist = [] glist = [] tlist = [] for i in range(0, len(subst)): ca = subst[i].count("A") alist.append(ca) for i in range(0, len(subst)): ca = subst[i].count("C") clist.append(ca) for i in range(0, len(subst)): ca = subst[i].count("T") tlist.append(ca) for i in range(0, len(subst)): ca = subst[i].count("G") glist.append(ca) for i in range(0, len(alist)): cslist = [alist[i], clist[i], tlist[i], glist[i]]