Esempio n. 1
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem6')
    solution_dir = os.path.join("Problems", "Problem6Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        genome = case[0]
        k = case[1][0]
        l = case[1][1][0]
        t = case[1][1][1]
        dna = DNA(genome)
        clumps_patterns = dna.get_clumps_patterns(int(k), int(t), int(l))

        if clumps_patterns.sort() != case_output.sort():
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + clumps_patterns)

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        case = test_cases[test_i]
        genome = case[0]
        k = case[1][0]
        l = case[1][1][0]
        t = case[1][1][1]
        dna = DNA(genome)
        clumps_patterns = dna.get_clumps_patterns(int(k), int(t), int(l))
        usage.end()

        writer.write_data(test_i, clumps_patterns, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + genome + "\n" + str(k) + " " + str(l) + " " + str(t))

        print("\n\nOutput")
        print("=====")

        for clump in clumps_patterns:
            print(clump)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 2
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem2')
    solution_dir = os.path.join("Problems", "Problem2Solution")

    data_reader = DataReader(problem_dataset_dir)
    training_data, testing_data = data_reader.get_data()
    codons_table = data_reader.get_rna_codon_table()

    for sample in training_data:
        dna_string = sample[0][0]
        amino_acid = sample[0][1]
        output = sample[1]
        dna = DNA(dna_string)
        dna.set_codon_table(codons_table)
        candidates = dna.get_dna_to_amino_acid_candidates(amino_acid)

        if set(candidates) != set(output):
            raise Exception("Output not matched!\nExpecting: " + str(output) +
                            "\nFound: " + str(candidates))

    print("Passed training data..\n\n")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for sample in testing_data:
        usage.start()
        dna_string = sample[0][0]
        amino_acid = sample[0][1]
        dna = DNA(dna_string)
        dna.set_codon_table(codons_table)
        candidates = dna.get_dna_to_amino_acid_candidates(amino_acid)
        usage.end()

        writer.write_data((dna_string, amino_acid), candidates,
                          usage.get_execution_time(), usage.get_memory_usage())
        print("DNA:\n" + dna_string)
        print("Protein\n" + amino_acid)

        print("\n\nOutput")
        print("=====")

        print(str(len(candidates)))
        for substring in candidates:
            print(substring)

        print("\n\nExecution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Usage: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 3
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem3')
    solution_dir = os.path.join("Problems", "Problem3Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        dna = DNA(case[0])
        k_mers = dna.most_frequent_k_mer(int(case[1]))

        if k_mers.sort() != case_output.sort():
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " + str(k_mers))
Esempio n. 4
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem11')
    solution_dir = os.path.join("Problems", "Problem11Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()
    scoring_matrix = data_reader.get_BLOSUM62_data()

    for train_i in range(0, len(output)):
        alpha_dna, beta_dna = test_cases[train_i]
        case_output = output[train_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        align = strings_algorithms.alignment(_type='global',
                                             scoring_matrix=scoring_matrix)

        if align != case_output:
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " + str(align))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        alpha_dna, beta_dna = test_cases[test_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        align = strings_algorithms.alignment(_type='global',
                                             scoring_matrix=scoring_matrix)
        usage.end()

        writer.write_data(test_i + 1, align, usage.get_execution_time(),
                          usage.get_memory_usage())
        print("\n\nInput:\n" + alpha_dna + "\n" + beta_dna + "\n")

        print("\n\nOutput")
        print("=====")

        print(align[0])
        print(align[1][0])
        print(align[1][1])

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 5
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem4')
    solution_dir = os.path.join("Problems", "Problem4Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        dna = DNA(case)
        reverse_complement = dna.reverse_complement()

        if reverse_complement != case_output:
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " +
                            reverse_complement)
Esempio n. 6
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem2')
    solution_dir = os.path.join("Problems", "Problem2Solution")

    data_reader = DataReader(problem_dataset_dir)
    training_data, testing_data = data_reader.get_data()
    codons_table = data_reader.get_rna_codon_table()

    for sample in training_data:
        dna_string = sample[0][0]
        amino_acid = sample[0][1]
        output = sample[1]
        dna = DNA(dna_string)
        dna.set_codon_table(codons_table)
        candidates = dna.get_dna_to_amino_acid_candidates(amino_acid)

        if set(candidates) != set(output):
            raise Exception("Output not matched!\nExpecting: " + str(output) + "\nFound: " + str(candidates))

    print("Passed training data..\n\n")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for sample in testing_data:
        usage.start()
        dna_string = sample[0][0]
        amino_acid = sample[0][1]
        dna = DNA(dna_string)
        dna.set_codon_table(codons_table)
        candidates = dna.get_dna_to_amino_acid_candidates(amino_acid)
        usage.end()

        writer.write_data((dna_string, amino_acid), candidates, usage.get_execution_time(), usage.get_memory_usage())
        print("DNA:\n" + dna_string)
        print("Protein\n" + amino_acid)

        print("\n\nOutput")
        print("=====")

        print(str(len(candidates)))
        for substring in candidates:
            print(substring)

        print("\n\nExecution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Usage: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 7
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem9')
    solution_dir = os.path.join("Problems", "Problem9Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        genome = case[0]
        k = case[1][0]
        d = case[1][1]
        dna = DNA(genome)
        k_mers = dna.most_frequent_missmatched_k_mer(int(k), int(d))

        if set(case_output) != set(k_mers):
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + str(k_mers))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output) + 1, len(test_cases)):
        usage.start()
        case = test_cases[test_i]
        genome = case[0]
        k = case[1][0]
        d = case[1][1]
        dna = DNA(genome)
        k_mers = dna.most_frequent_missmatched_k_mer(int(k), int(d))
        usage.end()

        writer.write_data(test_i, k_mers, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + genome + "\n" + str(k) + "\n" + str(d))

        print("\n\nOutput")
        print("=====")

        print('\n'.join(map(lambda v: str(v), k_mers)))

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 8
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem7')
    solution_dir = os.path.join("Problems", "Problem7Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = np.array(output[train_i])
        dna = DNA(case.strip())
        min_skew_indices = dna.get_min_skew()

        if not np.array_equal(case_output, min_skew_indices):
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " +
                            str(min_skew_indices))
Esempio n. 9
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem5')
    solution_dir = os.path.join("Problems", "Problem5Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        pattern = case[0]
        genome = case[1]
        dna = DNA(genome)
        pattern_indices = dna.get_pattern_indices(pattern)

        if pattern_indices != case_output:
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " + pattern_indices)
Esempio n. 10
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem5')
    solution_dir = os.path.join("Problems", "Problem5Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        pattern = case[0]
        genome = case[1]
        dna = DNA(genome)
        pattern_indices = dna.get_pattern_indices(pattern)

        if pattern_indices != case_output:
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + pattern_indices)

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        case = test_cases[test_i]
        pattern = case[0]
        genome = case[1]
        dna = DNA(genome)
        pattern_indices = dna.get_pattern_indices(pattern)
        usage.end()

        writer.write_data(test_i + 1, pattern_indices, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + pattern + "\n" + genome)

        print("\n\nOutput")
        print("=====")

        print(pattern_indices)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 11
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem10')
    solution_dir = os.path.join("Problems", "Problem10Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        alpha_dna, beta_dna = test_cases[train_i]
        case_output = output[train_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        lcs = strings_algorithms.lcs('dp')
        print(len(lcs))

        if len(case_output) != len(lcs):
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " + str(lcs))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        alpha_dna, beta_dna = test_cases[test_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        lcs = strings_algorithms.lcs('dp')
        print(len(lcs))
        usage.end()

        writer.write_data(test_i + 1, lcs, usage.get_execution_time(),
                          usage.get_memory_usage())
        print("\n\nInput:\n" + alpha_dna + "\n" + beta_dna + "\n")

        print("\n\nOutput")
        print("=====")

        print(lcs)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 12
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem9')
    solution_dir = os.path.join("Problems", "Problem9Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        genome = case[0]
        k = case[1][0]
        d = case[1][1]
        dna = DNA(genome)
        k_mers = dna.most_frequent_missmatched_k_mer(int(k), int(d))

        if set(case_output) != set(k_mers):
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " + str(k_mers))
Esempio n. 13
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem11')
    solution_dir = os.path.join("Problems", "Problem11Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()
    scoring_matrix = data_reader.get_BLOSUM62_data()

    for train_i in range(0, len(output)):
        alpha_dna, beta_dna = test_cases[train_i]
        case_output = output[train_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        align = strings_algorithms.alignment(_type='global', scoring_matrix=scoring_matrix)

        if align != case_output:
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + str(align))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        alpha_dna, beta_dna = test_cases[test_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        align = strings_algorithms.alignment(_type='global', scoring_matrix=scoring_matrix)
        usage.end()

        writer.write_data(test_i + 1, align, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + alpha_dna + "\n" + beta_dna + "\n")

        print("\n\nOutput")
        print("=====")

        print(align[0])
        print(align[1][0])
        print(align[1][1])

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 14
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem16')
    solution_dir = os.path.join("Problems", "Problem16Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        k_mers = test_cases[train_i]
        graph = Graph()
        adj_list = graph.get_debruijn_graph(_type='k_mers', k_mers=k_mers)
        case_output = output[train_i]

        if sorted(adj_list.items()) != sorted(case_output.items()):
            raise Exception("Output not matched!\nExpecting: " +
                            str(sorted(case_output.items())) + "\nFound: " +
                            str(sorted(adj_list.items())))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        k_mers = test_cases[test_i]
        adj_list = Graph().get_debruijn_graph(_type='k_mers', k_mers=k_mers)
        usage.end()

        writer.write_data(test_i + 1, adj_list, usage.get_execution_time(),
                          usage.get_memory_usage())
        print("\n\nInput:\n" + str(k_mers) + "\n")

        print("\n\nOutput")
        print("=====")

        print(adj_list)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 15
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem13')
    solution_dir = os.path.join("Problems", "Problem13Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        (k, genome) = test_cases[train_i]
        case_output = output[train_i]
        dna = DNA(genome)
        k_mers = dna.get_k_mers(int(k))

        if sorted(case_output) != sorted(k_mers):
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " + str(k_mers))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        (k, genome) = test_cases[test_i]
        dna = DNA(genome)
        k_mers = dna.get_k_mers(int(k))
        usage.end()

        writer.write_data(test_i + 1, k_mers, usage.get_execution_time(),
                          usage.get_memory_usage())
        print("\n\nInput:\n" + k + "\n" + genome + "\n")

        print("\n\nOutput")
        print("=====")

        print(k_mers)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 16
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem17')
    solution_dir = os.path.join("Problems", "Problem17Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        dna = test_cases[train_i][0]
        k = test_cases[train_i][1][0]
        score_matrix = test_cases[train_i][1][1]
        case_output = output[train_i]
        most_probable_k_mer = DNA(dna).get_most_probable_k_mer(int(k), score_matrix)

        if most_probable_k_mer != case_output:
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + str(most_probable_k_mer))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output) - 1, len(test_cases)):
        usage.start()
        dna = test_cases[test_i][0]
        k = test_cases[test_i][1][0]
        score_matrix = test_cases[test_i][1][1]
        most_probable_k_mer = DNA(dna).get_most_probable_k_mer(int(k), score_matrix)
        usage.end()

        writer.write_data(test_i + 1, most_probable_k_mer, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + str(dna) + "\n" + str(k) + "\n" + str(score_matrix))

        print("\n\nOutput")
        print("=====")

        print(most_probable_k_mer)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 17
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem10')
    solution_dir = os.path.join("Problems", "Problem10Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        alpha_dna, beta_dna = test_cases[train_i]
        case_output = output[train_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        lcs = strings_algorithms.lcs('dp')
        print(len(lcs))

        if len(case_output) != len(lcs):
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + str(lcs))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        alpha_dna, beta_dna = test_cases[test_i]
        strings_algorithms = StringsAlgorithms(alpha_dna, beta_dna)
        lcs = strings_algorithms.lcs('dp')
        print(len(lcs))
        usage.end()

        writer.write_data(test_i + 1, lcs, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + alpha_dna + "\n" + beta_dna + "\n")

        print("\n\nOutput")
        print("=====")

        print(lcs)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 18
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem6')
    solution_dir = os.path.join("Problems", "Problem6Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        genome = case[0]
        k = case[1][0]
        l = case[1][1][0]
        t = case[1][1][1]
        dna = DNA(genome)
        clumps_patterns = dna.get_clumps_patterns(int(k), int(t), int(l))

        if clumps_patterns.sort() != case_output.sort():
            raise Exception("Output not matched!\nExpecting: " +
                            str(case_output) + "\nFound: " + clumps_patterns)
Esempio n. 19
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem15')
    solution_dir = os.path.join("Problems", "Problem15Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        k = test_cases[train_i][0]
        dna = test_cases[train_i][1]
        graph = Graph()
        adj_list = graph.get_debruijn_graph(_type='string', k=int(k), dna_string=dna)
        case_output = output[train_i]

        if sorted(adj_list.items()) != sorted(case_output.items()):
            raise Exception("Output not matched!\nExpecting: " + str(sorted(case_output.items())) + "\nFound: " + str(sorted(adj_list.items())))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        k = test_cases[test_i][0]
        dna = test_cases[test_i][1]
        adj_list = Graph().get_debruijn_graph(_type='string', k=int(k), dna_string=dna)
        usage.end()

        writer.write_data(test_i + 1, adj_list, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + str(k) + "\n" + dna + "\n")

        print("\n\nOutput")
        print("=====")

        print(adj_list)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 20
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem7')
    solution_dir = os.path.join("Problems", "Problem7Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = np.array(output[train_i])
        dna = DNA(case.strip())
        min_skew_indices = dna.get_min_skew()

        if not np.array_equal(case_output, min_skew_indices):
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + str(min_skew_indices))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        case = test_cases[test_i]
        dna = DNA(case.strip())
        min_skew_indices = dna.get_min_skew()
        usage.end()

        writer.write_data(test_i + 1, min_skew_indices, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + case + "\n")

        print("\n\nOutput")
        print("=====")

        print(list(min_skew_indices))

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 21
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem4')
    solution_dir = os.path.join("Problems", "Problem4Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        case = test_cases[train_i]
        case_output = output[train_i]
        dna = DNA(case)
        reverse_complement = dna.reverse_complement()

        if reverse_complement != case_output:
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + reverse_complement)

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        case = test_cases[test_i]
        dna = DNA(case)
        reverse_complement = dna.reverse_complement()
        usage.end()

        writer.write_data(test_i + 1, reverse_complement, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + case)

        print("\n\nOutput")
        print("=====")

        print(reverse_complement)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 22
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem13')
    solution_dir = os.path.join("Problems", "Problem13Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        (k, genome) = test_cases[train_i]
        case_output = output[train_i]
        dna = DNA(genome)
        k_mers = dna.get_k_mers(int(k))

        if sorted(case_output) != sorted(k_mers):
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + str(k_mers))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        (k, genome) = test_cases[test_i]
        dna = DNA(genome)
        k_mers = dna.get_k_mers(int(k))
        usage.end()

        writer.write_data(test_i + 1, k_mers, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + k + "\n" + genome + "\n")

        print("\n\nOutput")
        print("=====")

        print(k_mers)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 23
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem14')
    solution_dir = os.path.join("Problems", "Problem14Solution")

    data_reader = DataReader(problem_dataset_dir)
    test_cases, output = data_reader.get_data()

    for train_i in range(0, len(output)):
        k_mers = test_cases[train_i]
        case_output = output[train_i]
        graph = Graph(k_mers)
        adj_list = graph.get_overlap_graph()

        if sorted(adj_list.items()) != sorted(case_output.items()):
            raise Exception("Output not matched!\nExpecting: " + str(case_output) + "\nFound: " + str(adj_list))

    print("Passed training data..")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for test_i in range(len(test_cases) - len(output), len(test_cases)):
        usage.start()
        k_mers = test_cases[test_i]
        graph = Graph(k_mers)
        adj_list = graph.get_overlap_graph()
        usage.end()

        writer.write_data(test_i + 1, adj_list, usage.get_execution_time(), usage.get_memory_usage())
        print("\n\nInput:\n" + str(k_mers) + "\n")

        print("\n\nOutput")
        print("=====")

        print(adj_list)

        print("\n")
        print("======")
        print("Execution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Used: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 24
0
def main():
    problem_dataset_dir = os.path.join('Problems', 'Problem1')
    solution_dir = os.path.join("Problems", "Problem1Solution")

    data_reader = DataReader(problem_dataset_dir)
    training_data, testing_data = data_reader.get_data()
    codons_table = data_reader.get_rna_codon_table()

    for sample in training_data:
        rna_string = sample[0]
        output = sample[1]
        rna = RNA(rna_string)
        rna.set_codons_table(codons_table)
        amino_acid = rna.to_amino_acid()

        if amino_acid != output:
            raise Exception("Output not matched!\nExpecting: " + output +
                            "\nFound: " + amino_acid)

    print("Passed training data..\n\n")

    writer = DataWriter(solution_dir)
    usage = Usage()

    for sample in testing_data:
        usage.start()
        rna_string = sample[0]
        rna = RNA(rna_string)
        rna.set_codons_table(codons_table)
        amino_acid = rna.to_amino_acid()
        usage.end()

        writer.write_data(rna_string, amino_acid, usage.get_execution_time(),
                          usage.get_memory_usage())

        print("RNA:\n" + rna_string)
        print("Protein:\n" + amino_acid)
        print("\n\nExecution Time: " + str(usage.get_execution_time()) + " s")
        print("Memory Usage: " + str(usage.get_memory_usage()) + " MB")
Esempio n. 25
0
from data_reader.reader import DataReader
from libs_utils.sklearn_util.sklearn_util import SklearnUtil
from libs_utils.nltk_util.nltk_util import NltkUtil
from preprocessing_util.preprocessing import Preprocessing
import pickle

file_path = "./data/train.csv"

reader = DataReader(file_path)
reader.get_all_train_data()
questions_pairs, labels = reader.get_train_data()

preprocessor = Preprocessing(questions_pairs)
questions_pairs = preprocessor.remove_extra_whitespaces()
print("End removing extra spaces")

preprocessor = Preprocessing(questions_pairs)
questions_pairs = preprocessor.remove_punctuation()
print("End removing punctuations")

preprocessor = Preprocessing(questions_pairs)
questions_pairs = preprocessor.normalize_text()
print("End normalization")

preprocessor = Preprocessing(questions_pairs)
tokenized_questions_pairs = preprocessor.tokenize()
print("End tokenization")

preprocessor = Preprocessing(questions_pairs)
stemmed_questions_pairs = preprocessor.tokenize_with_stemming()
print("End stemmed tokenization")