Ejemplo n.º 1
0
    def test_recover_local_align_multiple_solutions(self):
        # Classes example
        ga_score, ga_trace, _ = local_align_multiple_solutions(
            self.slides_seq2, self.slides_seq1, self.sm, -8)
        rga = recover_local_align_multiple_solutions(ga_score, ga_trace,
                                                     self.slides_seq2,
                                                     self.slides_seq1)

        self.assertEqual(rga, [['HSW', 'HGW'], ['HSWG', 'HGWA']])

        ga_score, ga_trace, _ = local_align_multiple_solutions(
            self.seq1, self.seq2, self.sm_dna, -1)
        rga = recover_local_align_multiple_solutions(ga_score, ga_trace,
                                                     self.seq1, self.seq2)

        self.assertEqual(rga, [['AT', 'AT'], ['CA', 'CA']])

        seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas')
        ga_score, ga_trace, _ = local_align_multiple_solutions(
            seqs['sp|C1F111'], seqs['sp|B7JC18'], self.sm, -3)
        rga = recover_local_align_multiple_solutions(ga_score, ga_trace,
                                                     seqs['sp|C1F111'],
                                                     seqs['sp|B7JC18'])

        # 4 local optimal alignments between sp|C1F111 & sp|B7JC18
        self.assertEqual(len(rga), 4)

        print('>> Passed test_recover_local_align_multiple_solutions()')
Ejemplo n.º 2
0
    def test_compare_pairwise_num_local_align(self):
        seqs = list(
            BioSeq.read_fasta_file(
                'tests/files/protein_sequences.fas').values())
        cla = compare_pairwise_num_local_align(seqs, self.sm, -3)

        # Some random values
        self.assertEqual(
            cla[4][3],
            4)  # Between sp|C1F111 & sp|B7JC18 - matches previous test
        self.assertEqual(cla[0][0], 1)
        self.assertEqual(cla[9][6], 144)
        self.assertEqual(cla[8][10], 1152)

        print('>> Passed test_compare_num_pairwise_local_align()')
Ejemplo n.º 3
0
    def test_recover_global_align_multiple_solutions(self):
        _, ga_trace = global_align_multiple_solutions(self.slides_seq1,
                                                      self.slides_seq2,
                                                      self.sm, -3)
        rga = recover_global_align_multiple_solutions(ga_trace,
                                                      self.slides_seq1,
                                                      self.slides_seq2)
        seq1_alignments = [align[0] for align in rga]
        seq2_alignments = [align[1] for align in rga]

        # Classes Example
        self.assertTrue('-HGWAG' in seq1_alignments)
        self.assertTrue('PHSW-G' in seq2_alignments)

        # C2 example
        _, ga_trace = global_align_multiple_solutions(self.seq1, self.seq2,
                                                      self.sm_dna, -1)
        rga = recover_global_align_multiple_solutions(ga_trace, self.seq1,
                                                      self.seq2)
        seq1_alignments = [align[0] for align in rga]
        seq2_alignments = [align[1] for align in rga]

        self.assertEqual(len(seq1_alignments), 3)
        self.assertTrue('G-ATTACA' in seq1_alignments)
        self.assertTrue('GCA-TGCT' in seq2_alignments)
        self.assertTrue('GCAT-GCT' in seq2_alignments)
        self.assertTrue('GCATG-CT' in seq2_alignments)

        seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas')
        _, ga_trace = global_align_multiple_solutions(seqs['sp|C1F111'],
                                                      seqs['sp|B7JC18'],
                                                      self.sm, -3)
        rga = recover_global_align_multiple_solutions(ga_trace,
                                                      seqs['sp|C1F111'],
                                                      seqs['sp|B7JC18'])

        # 5760 global optimal alignments between sp|C1F111 & sp|B7JC18
        self.assertEqual(len(rga), 5760)

        print('>> Passed test_recover_global_align_multiple_solutions()')
Ejemplo n.º 4
0
        print('Seq1: ' + str(align[0]) + '\nSeq2: ' + str(align[1]) + '\n')
    wait_input()

    print("\n-------------------------\n")

    print("Now shall we make it a little bit more complex?\n")
    wait_input()

    print(
        "So, lets load and use the proteins present in the 'tests/files/protein_sequences.fas' file!\n\
        We need to use the read fasta functionality from the bioseq library to load them.\n"
    )
    wait_input()

    print("The protein sequences are:\n")
    seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas')
    for key, value in seqs.items():
        print("> " + str(key) + ": " + str(value) + "\n")
    wait_input()

    print("::: GLOBAL ALIGNMENT with multiple solutions :::\n")
    print(
        "\nLets make the global alignment with two of our protein sequences: sp|B0C882: & sp|A1TQI0, the 'sm_dna' and a gap of -3, shall we?\n"
    )
    p_seq1 = seqs["sp|B0C882"]
    p_seq2 = seqs["sp|A1TQI0"]
    wait_input()

    ga_score, ga_trace = global_align_multiple_solutions(
        p_seq1, p_seq2, sm_blosum, -3)
    print("Score matrix obtained:\n")