Beispiel #1
0
def Drift():
    """ Predicts probability that in a population of N diploid
    individuals initially possessing m copies of a dominant allele,
    we will observe after g generations at least k copies
    of a recessive allele (assuming Wright-Fisher model) """
    input = f.LoadFile('\\rosalind_wfmd.txt').split()
    N = int(input[0]) * 2
    m = int(input[1])  # initial num of copies of dom allele in pop (i)
    g = int(input[2])  # after g generations...
    k = int(input[3])  # prob that at least k copies of recessive (j)

    # Calculate probability of number of dominant alleles
    # Start with generation 0
    curr_gen = [0 for i in range(N + 1)]  # initialize as 0
    #-we know there is a 100% prob that there are m alleles
    #-everything else is 0
    curr_gen[m] = 1

    # iterate over generations
    for gen in range(g):
        next_gen = [0 for i in range(N + 1)]  #initialize as 0

        for i in range(N + 1):  #starting point
            for j in range(N + 1):  #ending point
                # temp-term = markov transition probability
                temp_term = nCr(N, i) * (j / N)**i * (1 - (j / N))**(N - i)
                # add to previous p (pA + pB = Ptotal)
                next_gen[i] += temp_term * curr_gen[j]

        curr_gen = next_gen  # update as current generation

    prob = str(sum(curr_gen[:-k]))  #sum = 'at least k'
    f.ExportToFile('rosalind_wfmd_output.txt', prob)
    return
Beispiel #2
0
def MendelFirstLaw():
    """Calculates the probability that 2 randomly selected mating
    organisms will produce indiv possessing a dominant allele"""
    input = f.LoadFile('\\rosalind_iprb.txt').split()
    k = int(input[0])  # num of h**o dominant indivs
    m = int(input[1])  # num of heterozygous indivs
    n = int(input[2])  # num of h**o recessive indivs

    total_outcomes = nCr(k + m + n, 2)
    o100 = nCr(k, 2) + (k * m) + (
        k * n
    )  # total outcomes with 100 pcent probability of dom offspring (k with itself and others)
    o75 = nCr(m, 2)  # outcomes w 75 pcent probability (m with itself)
    o50 = m * n  # m with n = 50 pcent prob
    #o0   = nCr(n,2) n with itself won't product dom offspring, don't need to calculate
    probability = (o100 + 0.75 * o75 + 0.5 * o50) / total_outcomes
    f.ExportToFile('rosalind_iprb_output.txt', str(probability))
    return
Beispiel #3
0
def IndependentAlleles():
    input = f.LoadFile('\\rosalind_lia.txt').split()
    k = int(input[0])
    N = int(input[1])

    P = 2**k
    prob = 0
    for i in range(N, P + 1):
        prob += nCr(P, i) * (0.25**i) * (0.75**(P - i)
                                         )  # formula for Mendel's 2nd Law
    f.ExportToFile('rosalind_lia_output.txt', str(prob))
    return
Beispiel #4
0
def Splicing():
    """ Returns sum of combinations C(n,k) for m<=k<=n, modulo 1000000 """
    [n, m] = f.LoadFile('\\rosalind_aspc.txt').split()
    n = int(n)
    m = int(m)

    count = 0
    for k in range(m, n + 1):
        count += nCr(n, k)

    f.ExportToFile('rosalind_aspc_output.txt', str(count % 1000000))
    return
Beispiel #5
0
def PP():
    """ Given the total collection (n) and size of partial permutation (k),
    returns the total number of partial permutations, modulo 1000000"""
    input = f.LoadFile('\\rosalind_pper.txt').split()
    n = int(input[0])
    k = int(input[1])

    # First find number of combinations (no repeats)
    c = nCr(n, k)
    # Multiply that times number of permutations for each combo
    p = factorial(k)
    total = (p * c) % 1000000
    f.ExportToFile('rosalind_pper_output.txt', str(total))
    return
Beispiel #6
0
def DriftToNone(N, g, m):
    """ Algorithm for time to loss of recessive allele """
    # N will be N from founder
    n = 2 * N
    k = 0

    curr_gen = [0 for i in range(n + 1)]
    curr_gen[m] = 1

    for gen in range(g):
        next_gen = [0 for i in range(n + 1)]
        for x in range(n + 1):
            for y in range(n + 1):
                temp_term = nCr(n, x) * (y / n)**x * (1 - (y / n))**(n - x)
                next_gen[x] += temp_term * curr_gen[y]

        curr_gen = next_gen
    return curr_gen[0]
Beispiel #7
0
def Segregation():
    """ Given positive integer n, returns array A of length 2n
    with A[k] representing log(probability) that 2 diploid siblings
    share at least k of their 2n chromosomes"""
    n = int(f.LoadFile('\\rosalind_indc.txt'))
    # P at least k match = P at least 2n-k differ
    N = 2 * n
    A = []
    for k in range(1, 2 * n + 1):
        prob = 0
        # Subtract probability of all other possiblities
        # ex. k = 2, and N = 5 means probability that there a 4 tails, 5 tails (0-3 is fine)
        for i in range(k, N + 1):
            prob += nCr(N, i) * (0.5)**i * (0.5)**(N - i)
        A.append(str(log10(prob)))

    f.ExportToFile('rosalind_indc_output.txt', ' '.join(A))
    return
Beispiel #8
0
def RestrictionPrediction():
    """ Returns array B representing the expected number of times
    that s will appear as a substring of a random DNA string t of
    length n, where t is formed with GC-content from A"""
    input = f.LoadFile('\\rosalind_eval.txt').splitlines()
    n = int(input[0])
    s = input[1]
    A = [float(x) for x in input[2].split()]

    B = []
    C = nCr(n - len(s) + 1, 1)
    for gc in A:
        prob = 1
        for nuc in s:
            if nuc in 'CG':
                prob = prob * (gc / 2)
            else:
                prob = prob * ((1 - gc) / 2)
        P = C * prob
        B.append(str(P))

    f.ExportToFile('rosalind_eval_output.txt', ' '.join(B))
    return