コード例 #1
0
from Utilities import file_reader

def major_el(arr):
    best_count = 0
    best = None
    elements = set(arr)
    for el in elements:
        count = arr.count(el)
        if count > best_count:
            best = el
            best_count = count
    if best_count < len(arr) / 2:
        best = -1
    return best

if __name__ == "__main__":
    input_list = file_reader("rosalind_maj.txt")
    k, n = list(map(int, input_list[0].split()))
    results = []
    assert len(input_list[1:]) == k
    for i in range(1, k+1):
        array = list(map(int, input_list[i].split()))
        result = major_el(array)
        results.append(result)
    print(" ".join(list(map(str, results))))
    
コード例 #2
0
ファイル: Gc.py プロジェクト: Veronica1663822/Veronica
import os
from Utilities import file_reader


def gc_counter(string):
    gc = 0
    for b in string:
        if b.lower() in "gc":
            gc += 1
    return float(gc) / float(len(string))


if __name__ == "__main__":
    input_list = file_reader("rosalind_gc.txt", fasta=True)[1:]
    best_fasta = None
    best_count = 0
    for i in range(len(input_list)):
        input_sublist = input_list[i].split("\n")
        fasta_name = input_sublist[0]
        fasta_code = "".join(input_sublist[1:])
        c = gc_counter(fasta_code)
        if c > best_count:
            best_fasta = fasta_name
            best_count = c
    print(best_fasta)
    print(best_count * 100)
コード例 #3
0
from Utilities import file_reader


def tran(stringA, stringB):
    transitions = 0
    traversions = 0
    for i in range(len(stringA)):
        b1 = stringA[i]
        b2 = stringB[i]
        if b1 != b2:
            if (b1 in "AG" and b2 in "AG") or (b1 in "CT" and b2 in "CT"):
                transitions += 1
            else:
                traversions += 1
    return transitions / traversions


if __name__ == "__main__":
    a = "GCAACGCACAACGAAAACCCTTAGGGACTGGATTATTTCGTGATCGTTGTAGTTATTGGAAGTACGGGCATCAACCCAGTT"
    b = "TTATCTGACAAAGAAAGCCGTCAACGGCTGGATAATTTCGCGATCGTGCTGGTTACTGGCGGTACGAGTGTTCCTTTGGGT"
    print(tran(a, b))
    dataset = file_reader("rosalind_tran.txt", fasta=True)[1:]
    print(dataset)
    a = "".join(dataset[0].split("\n")[1:])
    b = "".join(dataset[1].split("\n")[1:])
    assert len(a) == len(b)
    print(tran(a, b))
コード例 #4
0
from Utilities import file_reader


def pdst(l):
    matrix = []
    for el1 in l:
        row = []
        for el2 in l:
            counter = 0
            for i in range(len(el1)):
                if el1[i] != el2[i]:
                    counter += 1
            row.append(counter / len(el1))
        matrix.append(row)
    return matrix


if __name__ == "__main__":
    sample_l = ["TTTCCATTTA", "GATTCATTTC", "TTTCCATTTT", "GTTCCATTTA"]
    m = pdst(sample_l)
    for row in m:
        print(" ".join(list(map(str, row))))

    dataset = file_reader("rosalind_pdst.txt", fasta=True)[1:]
    final_list = []
    for el in dataset:
        s = "".join(el.split("\n")[1:])
        final_list.append(s)
    m = pdst(final_list)
    for row in m:
        print(" ".join(list(map(str, row))))
コード例 #5
0
ファイル: 3Sum.py プロジェクト: Veronica1663822/Veronica
                if j == -value and i not in dict_positives[j]:
                    index1, index2 = dict_positives[j]
                    return index1 + 1, index2 + 1, i + 1
        elif value > 0:
            for j in dict_negatives.keys():
                if j == -value and i not in dict_negatives[j]:
                    index1, index2 = dict_negatives[j]
                    return index1 + 1, index2 + 1, i + 1
        else:
            for j in dict_zeros.keys():
                if i not in dict_zeros[j]:
                    index1, index2 = dict_zeros[j]
                    return index1 + 1, index2 + 1, i + 1
    else:
        return [-1]


l = file_reader("rosalind_3sum.txt")
k = l[0].split()[0]
results = []
input_sample = [
    "4 5", "2 -3 4 10 5", "8 -6 4 -2 -8", "-5 2 3 2 -4", "2 4 -5 6 8"
]
k_sample = input_sample[0].split()[0]

start = time.time()
for ind in range(1, int(k) + 1):
    result = sum_3(list(map(int, l[ind].split())))
    print(" ".join(list(map(str, sorted(result)))))
print(time.time() - start)
コード例 #6
0
ファイル: Mer.py プロジェクト: Veronica1663822/Veronica
    print("Merge_arrays2 took {} seconds".format(time.time() - start))
    return sorted


def merge_arrays(a, b):
    i = j = 0
    sorted_arr = []
    counter = 0
    while i < len(a) and j < len(b):
        if a[i] <= b[j]:
            sorted_arr.append(a[i])
            i += 1
        else:
            sorted_arr.append(b[j])
            j += 1
            counter += len(a) - i
    sorted_arr += a[i:] + b[j:]
    return counter, sorted_arr


if __name__ == "__main__":
    a = [2, 4, 10, 18]
    b = [-5, 11, 12]

    ls = file_reader("rosalind_mer.txt")
    a = list(map(int, ls[1].split()))
    b = list(map(int, ls[3].split()))
    n, ls = merge_arrays(a, b)
    s = " ".join(list(map(str, ls)))
    print(s)
コード例 #7
0
ファイル: Sseq.py プロジェクト: Veronica1663822/Veronica
from Utilities import file_reader


def sseq(s, t):
    i_t = 0
    indices = []
    for i in range(len(s)):
        if s[i] == t[i_t]:
            i_t += 1
            indices.append(i + 1)
        if i_t >= len(t):
            break
    return indices


if __name__ == "__main__":
    sample_string = "ACGTACGTGACG"
    sample_seq = "GTA"
    print((sseq(sample_string, sample_seq)))
    not_splitted_string, not_splitted_seq = file_reader(
        "rosalind_sseq (1).txt", fasta=True)[1:]
    string = "".join(not_splitted_string.split("\n")[1:])
    seq = not_splitted_seq.split("\n")[1]
    r = list(map(str, sseq(string, seq)))
    print(" ".join(r))
コード例 #8
0
def graph(string_list, k=3):
    dict_1 = {}
    result = []
    names = []
    for string in string_list:
        sub_list = string.split("\n")
        name, s = sub_list[0], "".join(sub_list[1:])
        dict_1[name] = s
        names.append(name)
    for key1 in names:
        suffix = dict_1[key1][-k:]
        for key2 in names:
            if dict_1[key1] == dict_1[key2]:
                continue
            prefix = dict_1[key2][:k]
            if prefix == suffix:
                result.append(" ".join([key1, key2]))
    return result


if __name__ == "__main__":
    sample_string_list = [
        ">Rosalind_0498\nAAATAAA", ">Rosalind_2391\nAAATTTT",
        ">Rosalind_2323\nTTTTCCC", ">Rosalind_0442\nAAATCCC",
        ">Rosalind_5013\nGGGTGGG"
    ]
    print(graph(sample_string_list))
    dataset = file_reader("rosalind_grph.txt", fasta=True)[1:]
    with open(os.path.expanduser("~/downloads/answer.txt"), "w") as file:
        file.write("\n".join(graph(dataset)))
コード例 #9
0
ファイル: Cons.py プロジェクト: Veronica1663822/Veronica
                print("character not recognised")
        final_string += b_list[np.argmax(np.array(b_counts))]
        profile_matrix.append(b_counts)
    profile_matrix_t = np.array(profile_matrix).transpose()
    print(final_string.upper())
    print(profile_matrix_t)

    if save_file:
        with open(os.path.expanduser("~/downloads/" + file_name), "w") as file:
            file.write(final_string.upper() + "\n")
        with open(os.path.expanduser("~/downloads/" + file_name),
                  "a+") as file:
            for i in range(len(profile_matrix_t)):
                string = b_list[i].upper() + ": " + " ".join(
                    list(map(str, profile_matrix_t[i]))) + "\n"
                assert len(profile_matrix_t[i]) == len(strings_list[0])
                file.write(string)


if __name__ == "__main__":
    input_sample_list = [
        "ATCCAGCT", "GGGCAACT", "ATGGATCT", "AAGCAACC", "TTGGAACT", "ATGCCATT",
        "ATGGCACT"
    ]
    cons(input_sample_list, save_file=True, file_name="sample.txt")
    true_input = file_reader("rosalind_cons.txt", fasta=True)[1:]
    final_list = []
    for string in true_input:
        final_list.append("".join(string.split("\n")[1:]))
    cons(final_list, save_file=True)
コード例 #10
0
ファイル: Subs.py プロジェクト: Veronica1663822/Veronica
from Utilities import file_reader


def find_motif(string, substring):
    i = 0
    indices = []
    while len(string[i:]) > len(substring):
        index = string.find(substring, i)
        if index >= 0:
            indices.append(index + 1)
            i = index + 1
        else:
            break
    return indices


if __name__ == "__main__":
    sample_string = "GATATATGCATATACTT"
    sample_substring = "ATAT"
    print(find_motif(sample_string, sample_substring))
    true_dataset = file_reader("rosalind_subs.txt")
    print(len(true_dataset))
    string, substring = true_dataset
    print(" ".join(list(map(str, find_motif(string, substring)))))
コード例 #11
0
ファイル: MS.py プロジェクト: Veronica1663822/Veronica
from Utilities import file_reader
from Mer import merge_arrays
import os


def merge_sort(a):
    if len(a) <= 1:
        return 0, a

    median = len(a) // 2
    left_count, left = merge_sort(a[:median])
    right_count, right = merge_sort(a[median:])
    combined_inv, combined = merge_arrays(left, right)
    return combined_inv + left_count + right_count, combined


if __name__ == "__main__":
    l = file_reader("rosalind_ms (1).txt")
    ls = list(map(int, l[1].split()))
    n, sorted_arr = merge_sort(ls)
    ans = " ".join(list(map(str, sorted_arr)))

    with open(os.path.expanduser("~/downloads/answer.txt"), "w") as file_:
        file_.write(ans)
コード例 #12
0
from Utilities import file_reader
from itertools import product
import os


def lexf(string, k=2):
    string = "".join(string.split())
    solution = product(string, repeat=k)
    return solution


if __name__ == "__main__":
    sample_string = "A C G T"
    for a, b in lexf(sample_string):
        print(a, b)
    true_string, k = file_reader("rosalind_lexf.txt")[:-1]
    k = int(k)
    final_string = ""
    for l in lexf(true_string, k):
        final_string += "".join(l) + "\n"
    with open(os.path.expanduser("~/downloads/answer.txt"), "w") as file:
        file.write(final_string)
    print(final_string)
コード例 #13
0
from Utilities import file_reader

def par(arr):
    i = len(arr)-1
    pivot = arr[0]
    for j in range(len(arr)-1, 0, -1):
        if arr[j] > pivot:
            arr[j], arr[i] = arr[i], arr[j]
            i -= 1
    arr[i], arr[0] = arr[0], arr[i]
    i -= 1
    for j in range(i, -1, -1):
        if arr[j] == pivot:
            arr[j], arr[i] = arr[i], arr[j]
            i -= 1
    return arr

if __name__ == "__main__":
    input_list = file_reader("rosalind_par3.txt")
    sample = [4, 5, 6, 4, 1, 2, 5, 7, 4]
    array = list(map(int, input_list[1].split()))
    par(array)
    par(sample)
    print(sample)
    answer = " ".join(list(map(str, array)))
    print(answer)
コード例 #14
0
ファイル: Tree.py プロジェクト: Veronica1663822/Veronica
from Utilities import file_reader


def tree(n, l):
    for i in range(len(l)):
        if len(l[i].split()) < 2:
            print("Singleton element found: ", l[i])
            l.pop(i)
    return n - 1 - len(l)


if __name__ == "__main__":
    l_sample = [("1 2"), ("2 8"), ("4 10"), ("5 9"), ("6, 10"), ("7 9"), ("3")]
    n_sample = 10
    print(tree(n_sample, l_sample))
    dataset = file_reader("rosalind_tree.txt")[:-1]
    print(dataset)
    n = int(dataset[0])
    l = dataset[1:]
    print(tree(n, l))
コード例 #15
0
ファイル: Hamm.py プロジェクト: Veronica1663822/Veronica
from Utilities import file_reader


def hamm_dist(a, b):
    counter = 0
    for i in range(len(a)):
        if a[i] != b[i]:
            counter += 1
    return counter


if __name__ == "__main__":

    string_a, string_b = file_reader("rosalind_hamm (1).txt")[:-1]
    string_a_sample = "GAGCCTACTAACGGGAT"
    string_b_sample = "CATCGTAATGACGGCCT"
    print(hamm_dist(string_a_sample, string_b_sample))
    print("Now the true dataset:")
    print(hamm_dist(string_a, string_b))
コード例 #16
0
from Rna import dna_to_rna
from Revp import complementary_conv

START_CODON = "AUG"


def orf(s):
    r_s = complementary_conv(s[::-1])
    rna_s = dna_to_rna(s)
    rna_r_s = dna_to_rna(r_s)
    result = []
    for i in range(len(s)):
        if rna_s[i:i + 3] == START_CODON:
            decoded = from_rna_to_protein(rna_s[i:])
            if decoded is not None:
                result.append(decoded)
        if rna_r_s[i:i + 3] == START_CODON:
            decoded = from_rna_to_protein(rna_r_s[i:])
            if decoded is not None:
                result.append(decoded)
    return set(result)


if __name__ == "__main__":
    sample_string = "AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG"
    print("\n".join(orf(sample_string)))
    print()
    dataset = file_reader("rosalind_orf.txt")
    string = "".join(dataset[1:])
    print("\n".join(orf(string)))
コード例 #17
0
ファイル: Revp.py プロジェクト: Veronica1663822/Veronica
            converted += "G"
        else:
            raise ValueError("Character not recognized")
    return converted


def revp(string, n):
    string = string.upper()
    c_string = complementary_conv(string)
    result = []
    for i in range(len(string) - 1):
        for j in range(i + 2, i + n):
            if string[i:j] == c_string[j:i:-1]:
                result.append((i + 1, len(string[i:j]) + 1))
    return result


if __name__ == "__main__":
    sample_string = "TCAATGCATGCGGGTCTATATGCAT"
    n = 12
    r = revp(sample_string, n)
    for i, length in r:
        print(i, length)
    print('true dataset starts here:')
    true_dataset = file_reader("rosalind_revp.txt", fasta=True)[1]
    processed_dataset = true_dataset.split("\n")
    fasta_name, actual_string = processed_dataset[0], "".join(
        processed_dataset[1:])
    for i, length in revp(actual_string, n):
        print(i, length)
コード例 #18
0
def count_inv(arr):
    swaps, sorted_arr = merge_sort(arr)
    return sorted_arr, swaps


def count_inv2(arr):
    start = time.time()
    swaps = 0
    for i in range(len(arr)):
        smallest_i = i
        for j in range(i + 1, len(arr)):
            if arr[j] < arr[smallest_i]:
                smallest_i = j
        if smallest_i != i:
            arr[i], arr[smallest_i] = arr[smallest_i], arr[i]
            swaps += 1
    print(time.time() - start)
    return swaps


if __name__ == "__main__":
    input_list = file_reader("rosalind_inv.txt")
    l = list(map(int, input_list[1].split()))
    l_sample = [-6, 1, 15, 8, 10]
    a, n = count_inv(l_sample)
    print(l_sample)
    print(n)
    a, n = count_inv(l)
    print(a)
    print(n)
コード例 #19
0
    while i < len(int_l):
        j = 0
        while j < i:
            if mode == "increasing":
                condition_met = int_l[j] < int_l[i]
            elif mode == "decreasing":
                condition_met = int_l[j] > int_l[i]
            if condition_met:
                if lengths[i] <= lengths[j] + 1:
                    lengths[i] = lengths[j] + 1
                    indices[i] = j
            j += 1
        i += 1
    i = int(np.argmax(lengths))
    result = [int_l[i]]
    while indices[i] != None:
        idx = indices[i]
        result.insert(0, int_l[idx])
        i = idx
    return list(map(str, result))


if __name__ == "__main__":
    a = "5 1 4 2 3"
    print(lgs(a.split(), mode="increasing"))
    print(lgs(a.split(), mode="decreasing"))
    total_input = file_reader("rosalind_lgis.txt")
    true_test = total_input[1].split()
    print(" ".join(lgs(true_test, mode="increasing")))
    print(" ".join(lgs(true_test, mode="decreasing")))
コード例 #20
0
from Utilities import file_reader, RNA_TO_PROTEIN_DICT


def from_rna_to_protein(string):
    codified = ""
    for i in range(0, len(string), 3):
        if string[i:i + 3] in RNA_TO_PROTEIN_DICT.keys():
            value_to_add = RNA_TO_PROTEIN_DICT[string[i:i + 3]]
            if value_to_add != "Stop":
                codified += value_to_add
            else:
                return codified


if __name__ == "__main__":

    s = file_reader("rosalind_prot.txt")[0]
    print(from_rna_to_protein(s))