""" # Brute Force (Naive Search) solution presented. # For more optimized algorithm, see Rabin–Karp algorithm which is the most suited # for multiple pattern search that uses hashed values. from reader_FASTA import read_FASTA def common_finder(n): possible_commons = [first_piece[i:n+i] for i in range(len(first_piece) - n + 1)] for common_str in possible_commons: for ind, dna_list in enumerate(all_dna_pieces[1:]): if common_str not in dna_list: break elif ind == len(all_dna_pieces) - 2: return common_str else: continue return file = open('/Users/Anuar_the_great/desktop/rosalind_lcsm.txt') data = file.readlines() all_dna_pieces = read_FASTA(data).values() first_piece = all_dna_pieces[0] file.close() for i in range(len(min(all_dna_pieces)), 1, -1 ): if common_finder(i): least_common_str = common_finder(i) print least_common_str break
>Rosalind_12 ATCGGTCGAA >Rosalind_15 ATCGGTCGAGCGTGT Sample output: MVYIADKQHVASREAYGHMFKVCA """ from reader_FASTA import read_FASTA from rna_to_prot import RNA_to_protein def RNA_splicing(data, main_dna): for id, intron in data.items(): main_dna[1] = main_dna[1].replace(intron, "") main_dna[1] = main_dna[1].replace("T", "U") main_dna[1] = RNA_to_protein(main_dna[1]) return main_dna file = open("/Users/Anuar_the_great/desktop/Data_files/rosalind_splc.txt") main_dna = [file.readline()[1:].strip("\n"), file.readline().strip("\n")] data = file.readlines() file.close() # Alternative version for read_FASTA (returns a list instead): # no_introns = filter(lambda line: line[0] != '>', data) data = read_FASTA(data) print RNA_splicing(data, main_dna)
Rosalind_0498 Rosalind_2391 Rosalind_0498 Rosalind_0442 Rosalind_2391 Rosalind_2323 """ from reader_FASTA import read_FASTA file = open('/Users/Anuar_the_great/desktop/rosalind_grph(1).txt') data = file.readlines() file.close() def adjacency_list1(dict): leest = [] for key1, value1 in dict.items(): for key2, value2 in dict.items(): if key1 != key2: if value1.endswith(value2[0:3]): leest.append([key1, key2]) return leest # More optimized code (same logic but uses list comprehension) def adjacency_list2(dict): return [[key1, key2] for key1, value1 in dict.items() for key2, value2 in dict.items()\ if key1 != key2 if value1.endswith(value2[0:3])] dict = read_FASTA(data) leest1 = adjacency_list1(dict) leest2 = adjacency_list2(dict) for nodes1, nodes2 in zip(leest1, leest2): print nodes1[0], nodes1[1], '; ', nodes2[0], nodes2[1]