import sys import dna.utils as utils def suffix_array(text): return [ str(j[1]) for j in sorted([((s[i:], i)) for i in range(len(text))]) ] if __name__ == '__main__': s = 'AACGATAGCGGTAGA$' if len(sys.argv) >= 2: s = utils.read_file_lines(sys.argv[1])[0].strip() print(', '.join(suffix_array(s))) with open('output/answer_21.txt', 'w') as out: print(', '.join(map(str, suffix_array(s))), file=out) print('Written to {}'.format(out.name)) expected = utils.read_file('output/expected_suff_arr.txt') actual = utils.read_file('output/answer_21.txt') print(actual == expected)
import sys import dna.utils as utils import suffix_array as sa def inverse_burrows(text): my_list = sorted(list(text)) for i in range(len(text) - 1): my_list = sorted([text[i] + my_list[i] for i in range(len(s))]) return my_list[0][1:] + my_list[0][0] if __name__ == '__main__': s = 'TGTTGGAGGTTAAGTTGTCCGTGGTCCGGACCCATAAGTAGTACTGGCTCGCATCACCTTCCGTAAGGTTTATGGCAATATGGAAGTATACCCTAACTCTTTTAGTCAATCTTGGACAGATTAGCTCCTCGTTGATGCACCCCTTGAGCGCTCAGCTCCGCACGCGGGACCCGCTCTGCCTGTGTTCCTCCGACAAAGAAAAGGCACTGGAGCGATACAATGCGCTGTTTCCACGTCGGACTCCATGCGCAGCAGCATCAAGAATCTAGCGACGAGAACGGTGACCCAATCGTAGCGAACTTTGATTCGTTGAGGAACCCTCCATCGTCTGGTATTTGATGATTGTTGGCTGTCTTCGTCCACCAGGGACGTATTCGTACTGATGATGAGAGGTTATACAATCCAGTGAGTTGTCTGACCGGTTGATAGGATTTAGGTTAACTCTCAAAAACAATGCTGACGAAACTGTGCATTATAAGCAAGGTTCGGTAGGAG$ACAAGAGTGAGGGGGCAGTATTTATCCGTTCGCCCGTCCTCACGCTAGCCGGAGCAACCCCGTGTAGGATAAAGTAGTCGGGCTCTGAACGGCGGTGACTGATAACTTCGGCGTCCGAAACCTGGGTGCTATTTAGAGGGAGGATTTTACCACAGTACCTTTTTTACCGGCGTGGCTGCCCTCCGCTGTCTTCAATTCTGTACATAAAGACAGACGAGGCCATTCCTGCGATTATTAGTCCACCGCTTCCGGGCGACTGCTCATGTGCTCAGACGCAGAAAACACAAAGTAAGAGAAGGGTCTGGCTTCCATTTAGTTAACGGTCTCGGCTACTCCCTAACAGCAGCTGTATACCAATGCACCGCCCTGGAAACATATGAAGATGTATTATGGGCGCGAAGTACTCTCGCGTTCTCCACTCCGAGTAGAA' if len(sys.argv) >= 2: s = utils.read_file(sys.argv[1]).strip() print(inverse_burrows(s)) expected = 'GATAATTATCGCAACGTATTAGCAGGTTCGCGCGTCTAACGAGCGTGTGGGAAATACATCCTCCCACATAATCTGTCCGGGTGTACCCAAGCTCTCCATCGGGGCCCCATACGTTCATGTACTTGACCCGGAGACCAAGGAAAATAACCGGGAGAGAATTCTGTCACCGCCATTTTGGGAATGGTCTCACTGAGGGGAGCTGGTAATGGATCGAATGTTGGCTATCGCATTAACTTGCAGCAATTCTCTGCAGAACGTTACTGCATACCAGGCGACGTCTCAAGATCCCAGCACTGCCTACGTGTCCTGTCGTATCATAACTACACTACTGTCGCGCATAGAACTATCGCGTTAGATAAGTACAGTATGTTAACCAGTTTGTCGTGTGATGGGATTTCTTTCACAATGGAACCTTCCATTTGGGGGGGTGGACGGCTGAGATCCTTTAAATGCTCCTAGGCGTAGCGTGGCCACTACGCTATAGGTGCACAGGGGAGGCTCTGAATGAAGACTTGCGCCAGACGGATAATGCTCAACGGGCATCGCCCGGAGTAGAAATGAAAGGGCGGCGCCCGTCCATCAACTTTATCGAAAGAGGTAACACAGTCCACAGATCGTCCCGCGAACATCGGGCACAGGATGGGTGACATTCTCCGCGTTTCTGGCGGATTTTAGCTGCATGGTCAACTTTAGATGTTCCACTACTGGGTTCAGGATGTTCTATCCTTCTTCTACCAATTACTGTCGGGGATTACTAGTATTTAGAACTCGATAGATACAGGTCTGTTGCATCTGCTGCCCGCGTTCTGGAGTTTGAGCGCCAGGAATCGTGACATAGCGGGATGCAGCTGTACGAAGACCCTAGGCTCAGTCTAAACATCGCCTCGCATCCCGCGGATTAAACAGCCTCTACTTGTACCACTTT$' print(inverse_burrows(s) == expected)
import sys import dna.utils as utils def calculate_mendels_probability(k, m, n): total = k + m + n # two reccessive genes rec_rec = (n / total) * ((n - 1) / (total - 1)) # two hetero genes het_het = (m / total) * ((m - 1) / (total - 1)) # a hetero and recessive het_rec = (m / total) * (n / (total - 1)) + (n / total) * (m / (total - 1)) probability = rec_rec + het_het / 4 + het_rec / 2 return 1 - probability if __name__ == '__main__': dominant, heterozygous, homozygous = [2, 2, 2] if len(sys.argv) >= 2: dominant, heterozygous, homozygous = map(int, utils.read_file(sys.argv[1]).split(' ')) print(calculate_mendels_probability(dominant, heterozygous, homozygous))
import sys import dna.utils as utils from Bio import pairwise2 from Bio.SubsMat import MatrixInfo as matlist def local_alignment(s, t): """local_alignment(s, t) performs a local alignment using the PAM250 matrix, aligns protein strings s and t and returns a possible alignment array """ matrix = matlist.pam250 return pairwise2.align.localds(s, t, matrix, -5, -5) if __name__ == '__main__': file_content = '>Rosalind_67\nMEANLYPRTEINSTRING\n>Rosalind_17\nPLEASANTLYEINSTEIN' if len(sys.argv) >= 2: file_content = utils.read_file(sys.argv[1]) s, t = utils.parse_fasta(file_content) alignment = local_alignment(s, t)[0] score = int(alignment[2]) span = (alignment[3], alignment[4]) new_s = alignment[0][span[0]:span[1]] new_t = alignment[1][span[0]:span[1]] print('\n'.join( [str(score), new_s.replace('-', ''), new_t.replace('-', '')]))
import dna.utils as utils import reverse_complement as rvc def edge(elmmt, k): return '(' + elmmt[0:k - 1] + ',' + elmmt[1:k] + ')' def de_bruijn(sequences): edge_nodes = set() for s in sequences: edge_nodes.add(s) edge_nodes.add(rvc.complementary_nucleotide(s)) k = len(sequences[0]) edge_nodes = [edge(element, k) for element in edge_nodes] return edge_nodes if __name__ == '__main__': sequences = ['TGAT', 'CATG', 'TCAT', 'ATGC', 'CATC', 'CATC'] if len(sys.argv) >= 2: data = utils.read_file(sys.argv[1]) sequences = data.splitlines() graph = de_bruijn(sequences) with open('output/answer_13.txt', 'w') as answer: print('\n'.join(graph), file=answer)