예제 #1
0
import sys
import dna.utils as utils


def suffix_array(text):
    return [
        str(j[1]) for j in sorted([((s[i:], i)) for i in range(len(text))])
    ]


if __name__ == '__main__':
    s = 'AACGATAGCGGTAGA$'
    if len(sys.argv) >= 2:
        s = utils.read_file_lines(sys.argv[1])[0].strip()

    print(', '.join(suffix_array(s)))

    with open('output/answer_21.txt', 'w') as out:
        print(', '.join(map(str, suffix_array(s))), file=out)
        print('Written to {}'.format(out.name))

    expected = utils.read_file('output/expected_suff_arr.txt')

    actual = utils.read_file('output/answer_21.txt')

    print(actual == expected)
import sys
import dna.utils as utils
import suffix_array as sa


def inverse_burrows(text):
    my_list = sorted(list(text))
    for i in range(len(text) - 1):
        my_list = sorted([text[i] + my_list[i] for i in range(len(s))])
    return my_list[0][1:] + my_list[0][0]


if __name__ == '__main__':
    s = 'TGTTGGAGGTTAAGTTGTCCGTGGTCCGGACCCATAAGTAGTACTGGCTCGCATCACCTTCCGTAAGGTTTATGGCAATATGGAAGTATACCCTAACTCTTTTAGTCAATCTTGGACAGATTAGCTCCTCGTTGATGCACCCCTTGAGCGCTCAGCTCCGCACGCGGGACCCGCTCTGCCTGTGTTCCTCCGACAAAGAAAAGGCACTGGAGCGATACAATGCGCTGTTTCCACGTCGGACTCCATGCGCAGCAGCATCAAGAATCTAGCGACGAGAACGGTGACCCAATCGTAGCGAACTTTGATTCGTTGAGGAACCCTCCATCGTCTGGTATTTGATGATTGTTGGCTGTCTTCGTCCACCAGGGACGTATTCGTACTGATGATGAGAGGTTATACAATCCAGTGAGTTGTCTGACCGGTTGATAGGATTTAGGTTAACTCTCAAAAACAATGCTGACGAAACTGTGCATTATAAGCAAGGTTCGGTAGGAG$ACAAGAGTGAGGGGGCAGTATTTATCCGTTCGCCCGTCCTCACGCTAGCCGGAGCAACCCCGTGTAGGATAAAGTAGTCGGGCTCTGAACGGCGGTGACTGATAACTTCGGCGTCCGAAACCTGGGTGCTATTTAGAGGGAGGATTTTACCACAGTACCTTTTTTACCGGCGTGGCTGCCCTCCGCTGTCTTCAATTCTGTACATAAAGACAGACGAGGCCATTCCTGCGATTATTAGTCCACCGCTTCCGGGCGACTGCTCATGTGCTCAGACGCAGAAAACACAAAGTAAGAGAAGGGTCTGGCTTCCATTTAGTTAACGGTCTCGGCTACTCCCTAACAGCAGCTGTATACCAATGCACCGCCCTGGAAACATATGAAGATGTATTATGGGCGCGAAGTACTCTCGCGTTCTCCACTCCGAGTAGAA'

    if len(sys.argv) >= 2:
        s = utils.read_file(sys.argv[1]).strip()

    print(inverse_burrows(s))

    expected = 'GATAATTATCGCAACGTATTAGCAGGTTCGCGCGTCTAACGAGCGTGTGGGAAATACATCCTCCCACATAATCTGTCCGGGTGTACCCAAGCTCTCCATCGGGGCCCCATACGTTCATGTACTTGACCCGGAGACCAAGGAAAATAACCGGGAGAGAATTCTGTCACCGCCATTTTGGGAATGGTCTCACTGAGGGGAGCTGGTAATGGATCGAATGTTGGCTATCGCATTAACTTGCAGCAATTCTCTGCAGAACGTTACTGCATACCAGGCGACGTCTCAAGATCCCAGCACTGCCTACGTGTCCTGTCGTATCATAACTACACTACTGTCGCGCATAGAACTATCGCGTTAGATAAGTACAGTATGTTAACCAGTTTGTCGTGTGATGGGATTTCTTTCACAATGGAACCTTCCATTTGGGGGGGTGGACGGCTGAGATCCTTTAAATGCTCCTAGGCGTAGCGTGGCCACTACGCTATAGGTGCACAGGGGAGGCTCTGAATGAAGACTTGCGCCAGACGGATAATGCTCAACGGGCATCGCCCGGAGTAGAAATGAAAGGGCGGCGCCCGTCCATCAACTTTATCGAAAGAGGTAACACAGTCCACAGATCGTCCCGCGAACATCGGGCACAGGATGGGTGACATTCTCCGCGTTTCTGGCGGATTTTAGCTGCATGGTCAACTTTAGATGTTCCACTACTGGGTTCAGGATGTTCTATCCTTCTTCTACCAATTACTGTCGGGGATTACTAGTATTTAGAACTCGATAGATACAGGTCTGTTGCATCTGCTGCCCGCGTTCTGGAGTTTGAGCGCCAGGAATCGTGACATAGCGGGATGCAGCTGTACGAAGACCCTAGGCTCAGTCTAAACATCGCCTCGCATCCCGCGGATTAAACAGCCTCTACTTGTACCACTTT$'

    print(inverse_burrows(s) == expected)
예제 #3
0
import sys
import dna.utils as utils


def calculate_mendels_probability(k, m, n):
    total = k + m + n
    # two reccessive genes
    rec_rec = (n / total) * ((n - 1) / (total - 1))
    # two hetero genes
    het_het = (m / total) * ((m - 1) / (total - 1))
    # a hetero and recessive
    het_rec = (m / total) * (n / (total - 1)) + (n / total) * (m / (total - 1))

    probability = rec_rec + het_het / 4 + het_rec / 2

    return 1 - probability


if __name__ == '__main__':
    dominant, heterozygous, homozygous = [2, 2, 2]
    if len(sys.argv) >= 2:
        dominant, heterozygous, homozygous = map(int, utils.read_file(sys.argv[1]).split(' '))
    print(calculate_mendels_probability(dominant, heterozygous, homozygous))
예제 #4
0
import sys
import dna.utils as utils
from Bio import pairwise2
from Bio.SubsMat import MatrixInfo as matlist


def local_alignment(s, t):
    """local_alignment(s, t) performs a local alignment using the PAM250 matrix,
    aligns protein strings s and t and returns a possible alignment array
    """
    matrix = matlist.pam250
    return pairwise2.align.localds(s, t, matrix, -5, -5)


if __name__ == '__main__':
    file_content = '>Rosalind_67\nMEANLYPRTEINSTRING\n>Rosalind_17\nPLEASANTLYEINSTEIN'

    if len(sys.argv) >= 2:
        file_content = utils.read_file(sys.argv[1])

    s, t = utils.parse_fasta(file_content)
    alignment = local_alignment(s, t)[0]
    score = int(alignment[2])
    span = (alignment[3], alignment[4])
    new_s = alignment[0][span[0]:span[1]]
    new_t = alignment[1][span[0]:span[1]]
    print('\n'.join(
        [str(score),
         new_s.replace('-', ''),
         new_t.replace('-', '')]))
예제 #5
0
import dna.utils as utils
import reverse_complement as rvc


def edge(elmmt, k):
    return '(' + elmmt[0:k - 1] + ',' + elmmt[1:k] + ')'


def de_bruijn(sequences):
    edge_nodes = set()
    for s in sequences:
        edge_nodes.add(s)
        edge_nodes.add(rvc.complementary_nucleotide(s))

    k = len(sequences[0])
    edge_nodes = [edge(element, k) for element in edge_nodes]

    return edge_nodes


if __name__ == '__main__':
    sequences = ['TGAT', 'CATG', 'TCAT', 'ATGC', 'CATC', 'CATC']
    if len(sys.argv) >= 2:
        data = utils.read_file(sys.argv[1])
        sequences = data.splitlines()

    graph = de_bruijn(sequences)

    with open('output/answer_13.txt', 'w') as answer:
        print('\n'.join(graph), file=answer)