def test_run_blossum(self): pairs_to_result = { ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'): 0, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): 41, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 5, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): -4, ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 18, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): -5 } sequence_file = '../data/guideline_tests/needlemanwunsch.fa' sequences = parse_fasta_files([sequence_file]) gt = Gotoh(substitution_matrix=MatrixInfo.blosum62, gap_penalty=11, gap_extend=1, similarity=True, verbose=False, complete_traceback=True) results = gt.pairwise_alignments(sequences) for result in results: seqs = (str(result.seq1), str(result.seq2)) expected_score = pairs_to_result[seqs] self.assertEqual(result.score, expected_score) print(len(result.alignments))
def test_guideline_blosum(): """Test cases given on the guideline from 04.02.2019 the results are not as provided by the guideline """ gotoh = Gotoh() result, info = gotoh.run("data/xpgma_guideline.fasta", "data/xpgma_guideline.fasta", "data/blosum62.txt", False, 11, 1, True) # the results is a upper triangle matrix of shape n x n. seq1_seq2 = result[0][1] assert seq1_seq2[3] == 0 assert len(seq1_seq2[2]) == 1 assert seq1_seq2[2][0] == ('ILDMDVVEGSAARFDCKVEG_YPDPEVMWFKDDNP___VKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFV____SQTT') seq1_seq3 = result[0][2] assert seq1_seq3[3] == 41 assert len(seq1_seq3[2]) == 3 assert seq1_seq3[2][0] == ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEV_____LA') seq1_seq4 = result[0][3] assert seq1_seq4[3] == 5 assert len(seq1_seq4[2]) == 1 assert seq1_seq4[2][0] == ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD____') seq2_seq3 = result[1][2] assert seq2_seq3[3] == -5 assert len(seq2_seq3[2]) == 4 assert seq2_seq3[2][0] == ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGC_AAAGKPRPMVRWLRNG____EPLASQNRVEVLA') seq2_seq4 = result[1][3] assert seq2_seq4[3] == -4 assert len(seq2_seq4[2]) == 2 assert seq2_seq4[2][0] == ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRA_APKATILW__SKGTEILGNSTRVTVT_SD') seq3_seq4 = result[2][3] assert seq3_seq4[3] == 18 assert len(seq3_seq4[2]) == 1 assert seq3_seq4[2][0] == ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA_', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD')
def test_example_invalid_characters_fail(): """This function does a negative test: it checks if it fails when it is supposed to. The reason for failure is non-amino acid characters in file 2 (error code 12)""" gt = Gotoh() seq_fasta_1 = os.path.join('data','sequences','seq1.fasta') seq_fasta_2 = os.path.join('data','sequences','Invalid_characters.fasta') with pytest.raises(SystemExit) as InvalidCharactersException: result = gt.run(seq_fasta_1, seq_fasta_2, 'pam250', -11, -1, False) (id_seq1, seq1, id_seq2, seq2, score, alignments, num_alignments) = result assert InvalidCharactersException.type == SystemExit assert InvalidCharactersException.code == 12
def test_example_invalid_format_fail(): """This function does a negative test: it checks if it fails when it is supposed to. The reason for the failure is invalid file format: the first line does not start with >""" gt = Gotoh() seq_fasta_1 = os.path.join('data','sequences','seq1.fasta') seq_fasta_2 = os.path.join('data','sequences','Invalid_format.fasta') with pytest.raises(SystemExit) as InvalidFileException: result = gt.run(seq_fasta_2, seq_fasta_1, 'pam250', -11, -1, False) (id_seq1, seq1, id_seq2, seq2, score, alignments, num_alignments) = result assert InvalidFileException.type == SystemExit assert InvalidFileException.code == 1
def test_example(): """Test if run function can be called.""" gt = Gotoh() result = gt.run("data/sequence1.fa", "data/sequence2.fa", "data/blosum62.txt", 5, 20, False) (id_seq1, seq1, id_seq2, seq2, score, alignments) = result assert id_seq1 == "idA" assert id_seq2 == "idB" assert seq1 == "FancySequenceA" assert seq2 == "FancysequenceB" assert score == 1000 assert alignments[0] == ("Fancy_SequenceA_", "Fancys_equence_B")
def test_example(): """Test if run function can be called.""" gt = Gotoh() result, info = gt.run("data/sequence1.fasta", "data/sequence2.fasta", "data/test_scoring_similarity.txt", False, 5, 1, True) assert result[0][0][0].id == "idA" assert result[0][0][1].id == "idB" assert str(result[0][0][0].seq) == "TCCGA" assert str(result[0][0][1].seq) == "TACGCAGA" assert result[0][0][3] == -4 assert len(result[0][0][2]) == 3 assert result[0][0][2][0] == ('T___CCGA', 'TACGCAGA')
def test_too_few_arguments(): """This function does a negative test: it checks if it fails when it is supposed to. The reason for failure is non-amino acid characters in file 2 (error code 12)""" gt = Gotoh() seq_fasta_1 = os.path.join('data','sequences','seq1.fasta') seq_fasta_2 = os.path.join('data','sequences','seq2.fasta') # test is a variable which becomes True when there are too few arguments test = False try: with pytest.raises(SystemExit) as TooFewArguments: result = gt.run(seq_fasta_1, seq_fasta_2, 'pam250', -1, False) (id_seq1, seq1, id_seq2, seq2, score, alignments, num_alignments) = result # A TypeError is thrown when there are too few arguments (we are missing 1 argument) except TypeError: test = True assert test == True
def test_init(self): got = Gotoh() got.init_scoring_matrices("AAAC", "AAAC") print(got.scoring_matrix_D) print(got.scoring_matrix_P) print(got.scoring_matrix_Q) assert np.array_equal( got.scoring_matrix_D, np.array([[0., -12., -13., -14., -15.], [-12., 0., 0., 0., 0.], [-13., 0., 0., 0., 0.], [-14., 0., 0., 0., 0.], [-15., 0., 0., 0., 0.]])) assert np.array_equal( got.scoring_matrix_P, np.array([[-math.inf, -math.inf, -math.inf, -math.inf, -math.inf], [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.]])) assert np.array_equal( got.scoring_matrix_Q, np.array([[-math.inf, -math.inf, -math.inf, -math.inf, -math.inf], [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.]]))
def test_example_success(): """This calls the run method of the Needleman-Wunsch program and tests if it works as expected (positive test)""" gt = Gotoh() seq_fasta_1 = os.path.join('data','sequences','seq1.fasta') seq_fasta_2 = os.path.join('data','sequences','seq2.fasta') seq_fasta_3 = os.path.join('data','sequences','seq3.fasta') result = gt.run(seq_fasta_1, seq_fasta_2, 'pam250', -11, -1, True) (id_seq1, seq1, id_seq2, seq2, score, alignments,num_alignments) = result assert id_seq1 == "ID1" assert id_seq2 == "ID2" assert seq1 == "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN" assert seq2 == "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT" assert score == 33 assert alignments== [['ILDMDVVEGSAARFDCKVEG-YPDPEVMWFKDDNP---VKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFV----SQTT', ':::::::**::::::*:::: **:::::*:::::* ::::***: ::::'], ['ILDMDVVEGSAARFDCKVEG-YPDPEVMWFKDDNP---VKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHF----VSQTT', ':::::::**::::::*:::: **:::::*:::::* ::::*** :::::']] assert num_alignments == 2
def test_scoring_blossum(self): """Testing score calculation using Blossum62 + Gap Penalty = -11, gap extend = -1""" print("######### Testing calculation of scoring matrix. ###########") got = Gotoh(substitution_matrix=MatrixInfo.blosum62, gap_penalty=-11, gap_extend=-1) print("############# Case 1 ##############") print("############# START ##############") seq1 = "A" seq2 = "A" got.calculate_scoring_matrix(seq1, seq2) print("SEQ1: %s" % seq1) print("SEQ2: %s" % seq2) print("RESULT:\n %s" % got.scoring_matrix_D) np.testing.assert_array_equal(got.scoring_matrix_D, np.array([[0., -12.0], [-12., 4.]])) print("############# FINISH ##############") print("############# Case 2 ##############") print("############# START ##############") seq1 = "A" seq2 = "AT" got.calculate_scoring_matrix(seq1, seq2) print("SEQ1: %s" % seq1) print("SEQ2: %s" % seq2) print("RESULT:\n %s" % got.scoring_matrix_D) np.testing.assert_array_equal( got.scoring_matrix_D, np.array([[0., -12., -13.], [-12., 4., -8.]])) print("############# FINISH ##############") print("############# Case 3 ##############") print("############# START ##############") seq1 = "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT" seq2 = "ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA" got.calculate_scoring_matrix(seq1, seq2) print("SEQ1: %s" % seq1) print("SEQ2: %s" % seq2) print("RESULT:\n %s" % pprint.pformat(got.scoring_matrix_D)) self.assertAlmostEqual(got.scoring_matrix_D[-1][-1], -5.0) print("############# FINISH ##############")
def test_scoring_pam(self): """Testing score calculation using PAM250 + Gap Penalty = -11, gap extend = -1""" print("######### Testing calculation of scoring matrix. ###########") got = Gotoh(substitution_matrix=MatrixInfo.pam250, gap_penalty=-11, gap_extend=-1) print("############# Case 1 ##############") print("############# START ##############") seq1 = "A" seq2 = "A" got.calculate_scoring_matrix(seq1, seq2) print("SEQ1: %s" % seq1) print("SEQ2: %s" % seq2) print("RESULT:\n %s" % got.scoring_matrix_D) np.testing.assert_array_equal(got.scoring_matrix_D, np.array([[0., -12.], [-12., 2.]])) print("############# FINISH ##############") print("############# Case 2 ##############") print("############# START ##############") seq1 = "A" seq2 = "AT" got.calculate_scoring_matrix(seq1, seq2) print("SEQ1: %s" % seq1) print("SEQ2: %s" % seq2) print("RESULT:\n %s" % got.scoring_matrix_D) np.testing.assert_array_equal( got.scoring_matrix_D, np.array([[0., -12., -13.], [-12., 2., -10.]])) print("############# FINISH ##############") print("############# Case 3 ##############") print("############# START ##############") seq1 = "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN" seq2 = "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT" got.calculate_scoring_matrix(seq1, seq2) print("SEQ1: %s" % seq1) print("SEQ2: %s" % seq2) print("RESULT:\n %s" % pprint.pformat(got.scoring_matrix_D)) self.assertAlmostEqual(got.scoring_matrix_D[-1][-1], 33) print("############# FINISH ##############")
def let_compute(self, input): """ Starts the algorithm with the right input parameters. Args: input: console input """ algorithm = input[0] if algorithm == available_algorithms.GOTOH: procedure = Gotoh() elif algorithm == available_algorithms.NEEDLEMAN_WUNSCH: procedure = NeedlemanWunsch() elif algorithm == available_algorithms.NEEDLEMAN_WUNSCH_3D: procedure = NeedlemanWunsch3() elif algorithm == available_algorithms.NUSSINOV: procedure = Nussinov() procedure.compute(input)
def test_instance(): """Check inheritance.""" assert issubclass(Gotoh, GotohBase) assert isinstance(Gotoh(), GotohBase)