from Utilities import file_reader def major_el(arr): best_count = 0 best = None elements = set(arr) for el in elements: count = arr.count(el) if count > best_count: best = el best_count = count if best_count < len(arr) / 2: best = -1 return best if __name__ == "__main__": input_list = file_reader("rosalind_maj.txt") k, n = list(map(int, input_list[0].split())) results = [] assert len(input_list[1:]) == k for i in range(1, k+1): array = list(map(int, input_list[i].split())) result = major_el(array) results.append(result) print(" ".join(list(map(str, results))))
import os from Utilities import file_reader def gc_counter(string): gc = 0 for b in string: if b.lower() in "gc": gc += 1 return float(gc) / float(len(string)) if __name__ == "__main__": input_list = file_reader("rosalind_gc.txt", fasta=True)[1:] best_fasta = None best_count = 0 for i in range(len(input_list)): input_sublist = input_list[i].split("\n") fasta_name = input_sublist[0] fasta_code = "".join(input_sublist[1:]) c = gc_counter(fasta_code) if c > best_count: best_fasta = fasta_name best_count = c print(best_fasta) print(best_count * 100)
from Utilities import file_reader def tran(stringA, stringB): transitions = 0 traversions = 0 for i in range(len(stringA)): b1 = stringA[i] b2 = stringB[i] if b1 != b2: if (b1 in "AG" and b2 in "AG") or (b1 in "CT" and b2 in "CT"): transitions += 1 else: traversions += 1 return transitions / traversions if __name__ == "__main__": a = "GCAACGCACAACGAAAACCCTTAGGGACTGGATTATTTCGTGATCGTTGTAGTTATTGGAAGTACGGGCATCAACCCAGTT" b = "TTATCTGACAAAGAAAGCCGTCAACGGCTGGATAATTTCGCGATCGTGCTGGTTACTGGCGGTACGAGTGTTCCTTTGGGT" print(tran(a, b)) dataset = file_reader("rosalind_tran.txt", fasta=True)[1:] print(dataset) a = "".join(dataset[0].split("\n")[1:]) b = "".join(dataset[1].split("\n")[1:]) assert len(a) == len(b) print(tran(a, b))
from Utilities import file_reader def pdst(l): matrix = [] for el1 in l: row = [] for el2 in l: counter = 0 for i in range(len(el1)): if el1[i] != el2[i]: counter += 1 row.append(counter / len(el1)) matrix.append(row) return matrix if __name__ == "__main__": sample_l = ["TTTCCATTTA", "GATTCATTTC", "TTTCCATTTT", "GTTCCATTTA"] m = pdst(sample_l) for row in m: print(" ".join(list(map(str, row)))) dataset = file_reader("rosalind_pdst.txt", fasta=True)[1:] final_list = [] for el in dataset: s = "".join(el.split("\n")[1:]) final_list.append(s) m = pdst(final_list) for row in m: print(" ".join(list(map(str, row))))
if j == -value and i not in dict_positives[j]: index1, index2 = dict_positives[j] return index1 + 1, index2 + 1, i + 1 elif value > 0: for j in dict_negatives.keys(): if j == -value and i not in dict_negatives[j]: index1, index2 = dict_negatives[j] return index1 + 1, index2 + 1, i + 1 else: for j in dict_zeros.keys(): if i not in dict_zeros[j]: index1, index2 = dict_zeros[j] return index1 + 1, index2 + 1, i + 1 else: return [-1] l = file_reader("rosalind_3sum.txt") k = l[0].split()[0] results = [] input_sample = [ "4 5", "2 -3 4 10 5", "8 -6 4 -2 -8", "-5 2 3 2 -4", "2 4 -5 6 8" ] k_sample = input_sample[0].split()[0] start = time.time() for ind in range(1, int(k) + 1): result = sum_3(list(map(int, l[ind].split()))) print(" ".join(list(map(str, sorted(result))))) print(time.time() - start)
print("Merge_arrays2 took {} seconds".format(time.time() - start)) return sorted def merge_arrays(a, b): i = j = 0 sorted_arr = [] counter = 0 while i < len(a) and j < len(b): if a[i] <= b[j]: sorted_arr.append(a[i]) i += 1 else: sorted_arr.append(b[j]) j += 1 counter += len(a) - i sorted_arr += a[i:] + b[j:] return counter, sorted_arr if __name__ == "__main__": a = [2, 4, 10, 18] b = [-5, 11, 12] ls = file_reader("rosalind_mer.txt") a = list(map(int, ls[1].split())) b = list(map(int, ls[3].split())) n, ls = merge_arrays(a, b) s = " ".join(list(map(str, ls))) print(s)
from Utilities import file_reader def sseq(s, t): i_t = 0 indices = [] for i in range(len(s)): if s[i] == t[i_t]: i_t += 1 indices.append(i + 1) if i_t >= len(t): break return indices if __name__ == "__main__": sample_string = "ACGTACGTGACG" sample_seq = "GTA" print((sseq(sample_string, sample_seq))) not_splitted_string, not_splitted_seq = file_reader( "rosalind_sseq (1).txt", fasta=True)[1:] string = "".join(not_splitted_string.split("\n")[1:]) seq = not_splitted_seq.split("\n")[1] r = list(map(str, sseq(string, seq))) print(" ".join(r))
def graph(string_list, k=3): dict_1 = {} result = [] names = [] for string in string_list: sub_list = string.split("\n") name, s = sub_list[0], "".join(sub_list[1:]) dict_1[name] = s names.append(name) for key1 in names: suffix = dict_1[key1][-k:] for key2 in names: if dict_1[key1] == dict_1[key2]: continue prefix = dict_1[key2][:k] if prefix == suffix: result.append(" ".join([key1, key2])) return result if __name__ == "__main__": sample_string_list = [ ">Rosalind_0498\nAAATAAA", ">Rosalind_2391\nAAATTTT", ">Rosalind_2323\nTTTTCCC", ">Rosalind_0442\nAAATCCC", ">Rosalind_5013\nGGGTGGG" ] print(graph(sample_string_list)) dataset = file_reader("rosalind_grph.txt", fasta=True)[1:] with open(os.path.expanduser("~/downloads/answer.txt"), "w") as file: file.write("\n".join(graph(dataset)))
print("character not recognised") final_string += b_list[np.argmax(np.array(b_counts))] profile_matrix.append(b_counts) profile_matrix_t = np.array(profile_matrix).transpose() print(final_string.upper()) print(profile_matrix_t) if save_file: with open(os.path.expanduser("~/downloads/" + file_name), "w") as file: file.write(final_string.upper() + "\n") with open(os.path.expanduser("~/downloads/" + file_name), "a+") as file: for i in range(len(profile_matrix_t)): string = b_list[i].upper() + ": " + " ".join( list(map(str, profile_matrix_t[i]))) + "\n" assert len(profile_matrix_t[i]) == len(strings_list[0]) file.write(string) if __name__ == "__main__": input_sample_list = [ "ATCCAGCT", "GGGCAACT", "ATGGATCT", "AAGCAACC", "TTGGAACT", "ATGCCATT", "ATGGCACT" ] cons(input_sample_list, save_file=True, file_name="sample.txt") true_input = file_reader("rosalind_cons.txt", fasta=True)[1:] final_list = [] for string in true_input: final_list.append("".join(string.split("\n")[1:])) cons(final_list, save_file=True)
from Utilities import file_reader def find_motif(string, substring): i = 0 indices = [] while len(string[i:]) > len(substring): index = string.find(substring, i) if index >= 0: indices.append(index + 1) i = index + 1 else: break return indices if __name__ == "__main__": sample_string = "GATATATGCATATACTT" sample_substring = "ATAT" print(find_motif(sample_string, sample_substring)) true_dataset = file_reader("rosalind_subs.txt") print(len(true_dataset)) string, substring = true_dataset print(" ".join(list(map(str, find_motif(string, substring)))))
from Utilities import file_reader from Mer import merge_arrays import os def merge_sort(a): if len(a) <= 1: return 0, a median = len(a) // 2 left_count, left = merge_sort(a[:median]) right_count, right = merge_sort(a[median:]) combined_inv, combined = merge_arrays(left, right) return combined_inv + left_count + right_count, combined if __name__ == "__main__": l = file_reader("rosalind_ms (1).txt") ls = list(map(int, l[1].split())) n, sorted_arr = merge_sort(ls) ans = " ".join(list(map(str, sorted_arr))) with open(os.path.expanduser("~/downloads/answer.txt"), "w") as file_: file_.write(ans)
from Utilities import file_reader from itertools import product import os def lexf(string, k=2): string = "".join(string.split()) solution = product(string, repeat=k) return solution if __name__ == "__main__": sample_string = "A C G T" for a, b in lexf(sample_string): print(a, b) true_string, k = file_reader("rosalind_lexf.txt")[:-1] k = int(k) final_string = "" for l in lexf(true_string, k): final_string += "".join(l) + "\n" with open(os.path.expanduser("~/downloads/answer.txt"), "w") as file: file.write(final_string) print(final_string)
from Utilities import file_reader def par(arr): i = len(arr)-1 pivot = arr[0] for j in range(len(arr)-1, 0, -1): if arr[j] > pivot: arr[j], arr[i] = arr[i], arr[j] i -= 1 arr[i], arr[0] = arr[0], arr[i] i -= 1 for j in range(i, -1, -1): if arr[j] == pivot: arr[j], arr[i] = arr[i], arr[j] i -= 1 return arr if __name__ == "__main__": input_list = file_reader("rosalind_par3.txt") sample = [4, 5, 6, 4, 1, 2, 5, 7, 4] array = list(map(int, input_list[1].split())) par(array) par(sample) print(sample) answer = " ".join(list(map(str, array))) print(answer)
from Utilities import file_reader def tree(n, l): for i in range(len(l)): if len(l[i].split()) < 2: print("Singleton element found: ", l[i]) l.pop(i) return n - 1 - len(l) if __name__ == "__main__": l_sample = [("1 2"), ("2 8"), ("4 10"), ("5 9"), ("6, 10"), ("7 9"), ("3")] n_sample = 10 print(tree(n_sample, l_sample)) dataset = file_reader("rosalind_tree.txt")[:-1] print(dataset) n = int(dataset[0]) l = dataset[1:] print(tree(n, l))
from Utilities import file_reader def hamm_dist(a, b): counter = 0 for i in range(len(a)): if a[i] != b[i]: counter += 1 return counter if __name__ == "__main__": string_a, string_b = file_reader("rosalind_hamm (1).txt")[:-1] string_a_sample = "GAGCCTACTAACGGGAT" string_b_sample = "CATCGTAATGACGGCCT" print(hamm_dist(string_a_sample, string_b_sample)) print("Now the true dataset:") print(hamm_dist(string_a, string_b))
from Rna import dna_to_rna from Revp import complementary_conv START_CODON = "AUG" def orf(s): r_s = complementary_conv(s[::-1]) rna_s = dna_to_rna(s) rna_r_s = dna_to_rna(r_s) result = [] for i in range(len(s)): if rna_s[i:i + 3] == START_CODON: decoded = from_rna_to_protein(rna_s[i:]) if decoded is not None: result.append(decoded) if rna_r_s[i:i + 3] == START_CODON: decoded = from_rna_to_protein(rna_r_s[i:]) if decoded is not None: result.append(decoded) return set(result) if __name__ == "__main__": sample_string = "AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG" print("\n".join(orf(sample_string))) print() dataset = file_reader("rosalind_orf.txt") string = "".join(dataset[1:]) print("\n".join(orf(string)))
converted += "G" else: raise ValueError("Character not recognized") return converted def revp(string, n): string = string.upper() c_string = complementary_conv(string) result = [] for i in range(len(string) - 1): for j in range(i + 2, i + n): if string[i:j] == c_string[j:i:-1]: result.append((i + 1, len(string[i:j]) + 1)) return result if __name__ == "__main__": sample_string = "TCAATGCATGCGGGTCTATATGCAT" n = 12 r = revp(sample_string, n) for i, length in r: print(i, length) print('true dataset starts here:') true_dataset = file_reader("rosalind_revp.txt", fasta=True)[1] processed_dataset = true_dataset.split("\n") fasta_name, actual_string = processed_dataset[0], "".join( processed_dataset[1:]) for i, length in revp(actual_string, n): print(i, length)
def count_inv(arr): swaps, sorted_arr = merge_sort(arr) return sorted_arr, swaps def count_inv2(arr): start = time.time() swaps = 0 for i in range(len(arr)): smallest_i = i for j in range(i + 1, len(arr)): if arr[j] < arr[smallest_i]: smallest_i = j if smallest_i != i: arr[i], arr[smallest_i] = arr[smallest_i], arr[i] swaps += 1 print(time.time() - start) return swaps if __name__ == "__main__": input_list = file_reader("rosalind_inv.txt") l = list(map(int, input_list[1].split())) l_sample = [-6, 1, 15, 8, 10] a, n = count_inv(l_sample) print(l_sample) print(n) a, n = count_inv(l) print(a) print(n)
while i < len(int_l): j = 0 while j < i: if mode == "increasing": condition_met = int_l[j] < int_l[i] elif mode == "decreasing": condition_met = int_l[j] > int_l[i] if condition_met: if lengths[i] <= lengths[j] + 1: lengths[i] = lengths[j] + 1 indices[i] = j j += 1 i += 1 i = int(np.argmax(lengths)) result = [int_l[i]] while indices[i] != None: idx = indices[i] result.insert(0, int_l[idx]) i = idx return list(map(str, result)) if __name__ == "__main__": a = "5 1 4 2 3" print(lgs(a.split(), mode="increasing")) print(lgs(a.split(), mode="decreasing")) total_input = file_reader("rosalind_lgis.txt") true_test = total_input[1].split() print(" ".join(lgs(true_test, mode="increasing"))) print(" ".join(lgs(true_test, mode="decreasing")))
from Utilities import file_reader, RNA_TO_PROTEIN_DICT def from_rna_to_protein(string): codified = "" for i in range(0, len(string), 3): if string[i:i + 3] in RNA_TO_PROTEIN_DICT.keys(): value_to_add = RNA_TO_PROTEIN_DICT[string[i:i + 3]] if value_to_add != "Stop": codified += value_to_add else: return codified if __name__ == "__main__": s = file_reader("rosalind_prot.txt")[0] print(from_rna_to_protein(s))