Return: The probability that two randomly selected mating organisms will produce an individual possessing a dominant allele (and thus displaying the dominant phenotype). Assume that any two organisms can mate. """ import common from math import factorial sample_data = "2 2 2" sample_output = "0.78333" def prob(inp): # k homozygous dominant (YY) # m heterozygous (Yy) # n homozygous recessive (yy) k, m, n = [float(x) for x in inp.split(" ")] N = k + m + n Nm = N - 1.0 p = (k / N) # YY chosen first p += (m / N) * ( (k / Nm) + (0.75 * (m - 1) / Nm) + (0.5 * (n / Nm))) # Yy first p += (n / N) * ((k / Nm) + (0.5 * (m / Nm))) #yy first return "%.5f" % p common.test(prob, sample_data, sample_output) common.runit(prob)
""" http://rosalind.info/problems/subs/ Given: Two DNA strings s and t (each of length at most 1 kbp). Return: All locations of t as a substring of s. """ import common import re sample_data = """GATATATGCATATACTT ATAT""" sample_output = "2 4 10" def motif(inp): s = inp.splitlines()[0] t = inp.splitlines()[1] p = re.compile(r'(?=(%s))' % t) results = [] for m in p.finditer(s): results.append(str(m.start() + 1)) return ' '.join(results) common.test(motif, sample_data, sample_output) common.runit(motif)
""" http://rosalind.info/problems/revc/ Given: A DNA string s of length at most 1000 bp. Return: The reverse complement sc of s. """ import common sample_data = "AAAACCCGGT" sample_output = "ACCGGGTTTT" comp = dict(A="T", T="A", C="G", G="C") def complement(dna): return "".join([x.replace(x, comp[x]) for x in dna[::-1]]) common.test(complement, sample_data, sample_output) common.runit(complement)
for line in inp.splitlines(): print line if line.startswith(">"): if current_dna: current_dna.get_gc() dnas.append(current_dna) current_dna = Dna(ID=line.replace(">", "").strip()) else: # dna stretches over multiple lines current_dna.dna_str += line.strip() dnas.append(current_dna) # from pprint import pprint # pprint(dnas) max_dna = dnas[0] for d in dnas: if d.gc > max_dna.gc: max_dna = d print "" print "DNA with largest GC content:" print max_dna.ID print max_dna.gc return "%s\n%s" % (max_dna.ID, str(max_dna.gc)) common.test(gc, sample_data, sample_output) common.runit(gc)
"""brute-force way""" counter = dict(A=0, C=0, G=0, T=0) for w in word: if w in alphabet: # error checking counter[w] += 1 print counter["A"], counter["C"], counter["G"], counter["T"] return "%s %s %s %s" % (counter["A"], counter["C"], counter["G"], counter["T"]) def counter_good(word): """nice way""" counter = dict((x, word.count(x)) for x in alphabet) print counter["A"], counter["C"], counter["G"], counter["T"] return "%s %s %s %s" % (counter["A"], counter["C"], counter["G"], counter["T"]) def counter_fancy(word): """super nice way""" from collections import Counter counter = Counter(word) print counter["A"], counter["C"], counter["G"], counter["T"] return "%s %s %s %s" % (counter["A"], counter["C"], counter["G"], counter["T"]) common.test(counter_old, sample_data, sample_output) common.runit(counter_fancy)
""" http://rosalind.info/problems/hamm/ Given: Two DNA strings s and t of equal length (not exceeding 1 kbp). Return: The Hamming distance dH(s,t). """ import common from itertools import izip sample_data = """GAGCCTACTAACGGGAT CATCGTAATGACGGCCT""" sample_output = "7" def hamm(inp): s = inp.splitlines()[0] t = inp.splitlines()[1] return str(sum([i != j for i, j in izip(s, t)])) common.test(hamm, sample_data, sample_output) common.runit(hamm)
"AGC": "S", "GGC": "G", "UGA": "Stop", "CGA": "R", "AGA": "R", "GGA": "G", "UGG": "W", "CGG": "R", "AGG": "R", "GGG": "G" } def code(rna): # check it starts correctly if rna[0:3] != "AUG": return None # split into 3-letter words words = [rna[i:i + 3] for i in range(0, len(rna), 3)] # change to proteins protein = [codon_table[w] for w in words] protein = protein[:protein.index("Stop")] return ''.join(protein) common.test(code, sample_data, sample_output) common.runit(code)
AA-AA AA-Aa AA-aa Aa-Aa Aa-aa aa-aa Return: The expected number of offspring displaying the dominant phenotype in the next generation, under the assumption that every couple has exactly two offspring. """ import common sample_data = "1 0 0 1 0 1" sample_output = "3.5" # probability of getting dominant phenotype for each of the 6 pairs as above probs = [1., 1., 1., 0.75, 0.5, 0.] def calc(inp): return str( sum([2 * probs[i] * int(j) for i, j in enumerate(inp.split(" "))])) common.test(calc, sample_data, sample_output) common.runit(calc)
""" http://rosalind.info/problems/rna/ Given: A DNA string t having length at most 1000 nt. Return: The transcribed RNA string of t. (T => U) """ import common alphabet = ['A', 'C', 'G', 'U'] sample_data = "GATGGAACTTGACTACGTAAATT" sample_output = "GAUGGAACUUGACUACGUAAAUU" def scribe(dna): out = dna.replace("T", "U") return out common.test(scribe, sample_data, sample_output) common.runit(scribe)
Return: The total number of rabbit pairs that will be present after n months if we begin with 1 pair and in each generation, every pair of reproduction-age rabbits produces a litter of k rabbit pairs (instead of only 1 pair). """ import common sample_data = "5 3" sample_output = "19" # Formula: F_n = F_{n-1} + kF_{n-2} def fib(inp): n = int(inp.split()[0]) k = int(inp.split()[1]) return str(term(n, k)) def term(i, k): if i == 1 or i == 2: return 1 else: return term(i - 1, k) + (k * term(i - 2, k)) common.test(fib, sample_data, sample_output) common.runit(fib)