예제 #1
0
Return: The probability that two randomly selected mating organisms will produce 
an individual possessing a dominant allele (and thus displaying the dominant 
phenotype). Assume that any two organisms can mate.
"""

import common
from math import factorial

sample_data = "2 2 2"
sample_output = "0.78333"


def prob(inp):
    # k homozygous dominant (YY)
    # m heterozygous (Yy)
    # n homozygous recessive (yy)
    k, m, n = [float(x) for x in inp.split(" ")]
    N = k + m + n
    Nm = N - 1.0
    p = (k / N)  # YY chosen first
    p += (m / N) * (
        (k / Nm) + (0.75 * (m - 1) / Nm) + (0.5 * (n / Nm)))  # Yy first
    p += (n / N) * ((k / Nm) + (0.5 * (m / Nm)))  #yy first
    return "%.5f" % p


common.test(prob, sample_data, sample_output)

common.runit(prob)
예제 #2
0
파일: motif.py 프로젝트: raggleton/rosalind
"""
http://rosalind.info/problems/subs/

Given: Two DNA strings s and t (each of length at most 1 kbp).

Return: All locations of t as a substring of s.
"""

import common
import re

sample_data = """GATATATGCATATACTT
ATAT"""

sample_output = "2 4 10"


def motif(inp):
    s = inp.splitlines()[0]
    t = inp.splitlines()[1]
    p = re.compile(r'(?=(%s))' % t)
    results = []
    for m in p.finditer(s):
        results.append(str(m.start() + 1))
    return ' '.join(results)


common.test(motif, sample_data, sample_output)

common.runit(motif)
예제 #3
0
"""
http://rosalind.info/problems/revc/

Given: A DNA string s of length at most 1000 bp.

Return: The reverse complement sc of s.
"""

import common

sample_data = "AAAACCCGGT"
sample_output = "ACCGGGTTTT"

comp = dict(A="T", T="A", C="G", G="C")


def complement(dna):
    return "".join([x.replace(x, comp[x]) for x in dna[::-1]])


common.test(complement, sample_data, sample_output)

common.runit(complement)
예제 #4
0
    for line in inp.splitlines():
        print line
        if line.startswith(">"):
            if current_dna:
                current_dna.get_gc()
                dnas.append(current_dna)
            current_dna = Dna(ID=line.replace(">", "").strip())
        else:
            # dna stretches over multiple lines
            current_dna.dna_str += line.strip()
    dnas.append(current_dna)

    # from pprint import pprint
    # pprint(dnas)

    max_dna = dnas[0]
    for d in dnas:
        if d.gc > max_dna.gc:
            max_dna = d
    print ""
    print "DNA with largest GC content:"
    print max_dna.ID
    print max_dna.gc

    return "%s\n%s" % (max_dna.ID, str(max_dna.gc))


common.test(gc, sample_data, sample_output)

common.runit(gc)
예제 #5
0
    """brute-force way"""
    counter = dict(A=0, C=0, G=0, T=0)
    for w in word:
        if w in alphabet:  # error checking
            counter[w] += 1
    print counter["A"], counter["C"], counter["G"], counter["T"]
    return "%s %s %s %s" % (counter["A"], counter["C"], counter["G"],
                            counter["T"])


def counter_good(word):
    """nice way"""
    counter = dict((x, word.count(x)) for x in alphabet)
    print counter["A"], counter["C"], counter["G"], counter["T"]
    return "%s %s %s %s" % (counter["A"], counter["C"], counter["G"],
                            counter["T"])


def counter_fancy(word):
    """super nice way"""
    from collections import Counter
    counter = Counter(word)
    print counter["A"], counter["C"], counter["G"], counter["T"]
    return "%s %s %s %s" % (counter["A"], counter["C"], counter["G"],
                            counter["T"])


common.test(counter_old, sample_data, sample_output)

common.runit(counter_fancy)
예제 #6
0
"""
http://rosalind.info/problems/hamm/

Given: Two DNA strings s and t of equal length (not exceeding 1 kbp).

Return: The Hamming distance dH(s,t).

"""

import common
from itertools import izip

sample_data = """GAGCCTACTAACGGGAT
CATCGTAATGACGGCCT"""

sample_output = "7"


def hamm(inp):
    s = inp.splitlines()[0]
    t = inp.splitlines()[1]
    return str(sum([i != j for i, j in izip(s, t)]))


common.test(hamm, sample_data, sample_output)

common.runit(hamm)
예제 #7
0
    "AGC": "S",
    "GGC": "G",
    "UGA": "Stop",
    "CGA": "R",
    "AGA": "R",
    "GGA": "G",
    "UGG": "W",
    "CGG": "R",
    "AGG": "R",
    "GGG": "G"
}


def code(rna):
    # check it starts correctly
    if rna[0:3] != "AUG":
        return None

    # split into 3-letter words
    words = [rna[i:i + 3] for i in range(0, len(rna), 3)]

    # change to proteins
    protein = [codon_table[w] for w in words]
    protein = protein[:protein.index("Stop")]
    return ''.join(protein)


common.test(code, sample_data, sample_output)

common.runit(code)
예제 #8
0
AA-AA
AA-Aa
AA-aa
Aa-Aa
Aa-aa
aa-aa

Return: The expected number of offspring displaying the dominant phenotype in
the next generation, under the assumption that every couple has exactly two
offspring.
"""

import common

sample_data = "1 0 0 1 0 1"
sample_output = "3.5"

# probability of getting dominant phenotype for each of the 6 pairs as above
probs = [1., 1., 1., 0.75, 0.5, 0.]


def calc(inp):
    return str(
        sum([2 * probs[i] * int(j) for i, j in enumerate(inp.split(" "))]))


common.test(calc, sample_data, sample_output)

common.runit(calc)
예제 #9
0
"""
http://rosalind.info/problems/rna/

Given: A DNA string t having length at most 1000 nt.

Return: The transcribed RNA string of t. (T => U)
"""
import common

alphabet = ['A', 'C', 'G', 'U']
sample_data = "GATGGAACTTGACTACGTAAATT"
sample_output = "GAUGGAACUUGACUACGUAAAUU"

def scribe(dna):
    out = dna.replace("T", "U")
    return out


common.test(scribe, sample_data, sample_output)

common.runit(scribe)
예제 #10
0
파일: fib.py 프로젝트: raggleton/rosalind
Return: The total number of rabbit pairs that will be present after n months if 
we begin with 1 pair and in each generation, every pair of reproduction-age 
rabbits produces a litter of k rabbit pairs (instead of only 1 pair).

"""

import common

sample_data = "5 3"
sample_output = "19"

# Formula: F_n = F_{n-1} + kF_{n-2}


def fib(inp):
    n = int(inp.split()[0])
    k = int(inp.split()[1])
    return str(term(n, k))


def term(i, k):
    if i == 1 or i == 2:
        return 1
    else:
        return term(i - 1, k) + (k * term(i - 2, k))


common.test(fib, sample_data, sample_output)

common.runit(fib)