print(path) print(matrix) product = 0.5 # probability of the first letter being A or B for i, k in zip(path[0::1], path[1::1]): if i == 'A' and k == 'A': product *= matrix.item(0) elif i == 'A' and k == 'B': product *= matrix.item(1) elif i == 'B' and k == 'A': product *= matrix.item(2) elif i == 'B' and k == 'B': product *= matrix.item(3) return product if __name__ == '__main__': path = 'AABBBAABABAAAABBBBAABBABABBBAABBAAAABABAABBABABBAB' transition_matrix = np.matrix([[0.194, 0.806], [0.273, 0.727]]) # read the dataset file in and construct the numpy matrix and path if len(sys.argv) >= 2: file_lines = utils.read_file_lines(sys.argv[1]) path = file_lines[0] last_line = np.array(file_lines[len(file_lines) - 1].split()[1:]) second_to_last = np.array(file_lines[len(file_lines) - 2].split()[1:]) transition_matrix = np.column_stack( (second_to_last.astype(float), last_line.astype(float))) print(format(path_probability(path, transition_matrix), '.11e'))
"""get_uniprot_fasta(uniprot_id) - makes a network GET request to the uniprot server using an id which returns a fasta file. This data is decoded into a 'utf-8' string.""" data = '' with urllib.request.urlopen('http://www.uniprot.org/uniprot/{}.fasta'.format(uniprot_id)) as response: data = response.read().decode('utf-8') return data def find_locations(id): """find_locations(id) - fetches the fasta of a protein id, then parses the fasta and looks for the locations of the protein motifs. returns an array of indcies, could be empty""" fasta = utils.parse_fasta(get_uniprot_fasta(id)) pattern = r'(?=(N[^P][ST][^P]))' locations = [] for match in re.finditer(pattern, str(fasta[0])): locations.append(match.start(0) + 1) return locations if __name__ == '__main__': dataset = ['A2Z669', 'B5ZC00', 'P07204_TRBM_HUMAN', 'P20840_SAG1_YEAST'] if len(sys.argv) >= 2: dataset = utils.read_file_lines(sys.argv[1]) # for each id, in the dataset, we figure out the locations from the remote fasta # and print out the id followed by the locations if they exist for access_id in dataset: if access_id.strip() is not '': locations = find_locations(access_id.strip()) if len(locations): print(access_id.strip()) print(' '.join(map(str, locations)))
import sys import dna.utils as utils def suffix_array(text): return [ str(j[1]) for j in sorted([((s[i:], i)) for i in range(len(text))]) ] if __name__ == '__main__': s = 'AACGATAGCGGTAGA$' if len(sys.argv) >= 2: s = utils.read_file_lines(sys.argv[1])[0].strip() print(', '.join(suffix_array(s))) with open('output/answer_21.txt', 'w') as out: print(', '.join(map(str, suffix_array(s))), file=out) print('Written to {}'.format(out.name)) expected = utils.read_file('output/expected_suff_arr.txt') actual = utils.read_file('output/answer_21.txt') print(actual == expected)
import sys import dna.utils as utils def remaining_rabbits(n, m): sequence = [1, 1] for i in range(n - 2): tmp = 0 if i + 2 < m: tmp = sequence[i] + sequence[i + 1] else: tmp = sum(sequence[i - (m - 2):i + 1]) sequence.append(tmp) return sequence[-1] if __name__ == '__main__': n = 6 m = 3 if len(sys.argv) >= 2: n, m = utils.read_file_lines(sys.argv[1])[0].split() n = int(n) m = int(m) print(remaining_rabbits(n, m))
given an integer 'n', calculates all the possible permutations from 1 to 'n' without repeating signs""" permutations = list(it.permutations(range(1, n + 1))) prefixes = list(it.product('-+', repeat=n)) results = list(it.product(permutations, prefixes)) enumerations = [] for product in results: numbers, signs = product for i, number in enumerate(numbers): sign = signs[i] number = int(sign + str(number)) enumerations.append(number) return enumerations if __name__ == '__main__': d = 5 if len(sys.argv) >= 2: d = int(utils.read_file_lines(sys.argv)[0]) gene_orders = enumerate_oriented_gene_orders(d) print(int(len(gene_orders) / d)) iterator = iter(gene_orders) for x in iterator: result = [] result.append(x) for i in range(d - 1): result.append(next(iterator)) print(' '.join(map(str, result)))