Example #1
0
    def test_insertion__greater_than_one_base(self):
        """Tests insertion with more than one base.
        """
        before_insertion = 'AAAAAAAAAAAAA'
        after_insertion = 'CCCCCCCCCCCCCC'
        raw_seq_str = before_insertion + after_insertion
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)
        maker = VCFToGenbankMaker(seq_record, None, None)

        INSERTION_SEQ = 'TT'

        insertion_data = {
            'position': len(before_insertion),
            'sequence': INSERTION_SEQ
        }
        maker.handle_insertion(insertion_data)

        # Assert the resulting sequence is correct.
        EXPECTED_SEQ = before_insertion + INSERTION_SEQ + after_insertion
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))

        # Assert the liftover is correct.
        EXPECTED_LIFTOVER_MAPPING_1 = ((0, len(before_insertion) - 1),
                                       (0, len(before_insertion) - 1))
        EXPECTED_LIFTOVER_MAPPING_2 = ((len(before_insertion),
                                        len(raw_seq_str) - 1),
                                       (len(before_insertion) + 2,
                                        len(raw_seq_str) + 1))
        EXPECTED_LIFTOVER = [
            EXPECTED_LIFTOVER_MAPPING_1, EXPECTED_LIFTOVER_MAPPING_2
        ]
        self.assertEqual(EXPECTED_LIFTOVER,
                         maker.runtime_liftover._interval_mapping)
Example #2
0
    def test_convert_target_position_to_source(self):
        """Simple test for converting a position in the target (new) genome
        back to the corresponding position in the original source genome.
        """
        before_insertion = 'AAAAAAAAAAAAA'
        after_insertion = 'CCCCCCCCCCCCCC'
        raw_seq_str = before_insertion + after_insertion
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)
        maker = VCFToGenbankMaker(seq_record, None, None)

        INSERTION_BASE = 'T'

        INSERTION_POSITION = len(before_insertion)
        insertion_data = {
            'position': INSERTION_POSITION,
            'sequence': INSERTION_BASE
        }
        maker.handle_insertion(insertion_data)

        self.assertEqual(
            3, maker.runtime_liftover.convert_target_position_to_source(3))
        self.assertIsNone(
            maker.runtime_liftover.convert_target_position_to_source(
                INSERTION_POSITION))
        self.assertEqual(
            INSERTION_POSITION,
            maker.runtime_liftover.convert_target_position_to_source(
                INSERTION_POSITION + 1))
Example #3
0
    def test_combo__reverse_operations(self):
        """Tests a combination of insertions and deletions, similar to above,
        but with operations applied in reverse order.
        """
        before_deletion = 'AAAAA'
        deleted_bases = 'TTT'
        after_deletion = 'GGGGGGGGGGG'
        insertion = 'AAA'
        after_insertion = 'TTTTT'
        raw_seq_str = (before_deletion + deleted_bases + after_deletion +
                       after_insertion)
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)
        maker = VCFToGenbankMaker(seq_record, None, None)

        insertion_start = (len(before_deletion) + len(deleted_bases) +
                           len(after_deletion))
        insertion_1_data = {'position': insertion_start, 'sequence': insertion}
        maker.handle_insertion(insertion_1_data)

        deletion_data = {
            'interval':
            (len(before_deletion), len(before_deletion) + len(deleted_bases))
        }
        maker.handle_deletion(deletion_data)

        EXPECTED_SEQ = (before_deletion + after_deletion + insertion +
                        after_insertion)
        self.assertEqual(len(EXPECTED_SEQ), len(seq_record.seq))
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))
Example #4
0
    def test_insertion__simple(self):
        """Tests insertion.

        Makes sure both the Genbank and the liftover file look correct.
        """
        before_insertion = 'AAAAAAAAAAAAA'
        after_insertion = 'CCCCCCCCCCCCCC'
        raw_seq_str = before_insertion + after_insertion
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)
        maker = VCFToGenbankMaker(seq_record, None, None)

        INSERTION_BASE = 'T'

        insertion_data = {
            'position': len(before_insertion),
            'sequence': INSERTION_BASE
        }
        maker.handle_insertion(insertion_data)

        EXPECTED_SEQ = before_insertion + INSERTION_BASE + after_insertion
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))

        # Assert the liftover is correct.
        EXPECTED_LIFTOVER_MAPPING_1 = ((0, len(before_insertion) - 1),
                                       (0, len(before_insertion) - 1))
        EXPECTED_LIFTOVER_MAPPING_2 = ((len(before_insertion),
                                        len(raw_seq_str) - 1),
                                       (len(before_insertion) + 1,
                                        len(raw_seq_str)))
        EXPECTED_LIFTOVER = [
            EXPECTED_LIFTOVER_MAPPING_1, EXPECTED_LIFTOVER_MAPPING_2
        ]
        self.assertEqual(EXPECTED_LIFTOVER,
                         maker.runtime_liftover._interval_mapping)
Example #5
0
    def test_insertion__multiple(self):
        """Tests insertion for multiple inserts.

        Makes sure both the Genbank and the liftover file look correct.
        """
        before_insertion_1 = 'AAAAAAAAAAAAA'
        after_insertion_1 = 'CCCCCCCCCCCCCC'
        before_insertion_2 = 'AAAAAAAAAAAAAAAAAA'
        after_insertion_2 = 'CCCCCCCCCCCCCCCC'
        raw_seq_str = (before_insertion_1 + after_insertion_1 +
                       before_insertion_2 + after_insertion_2)
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)
        maker = VCFToGenbankMaker(seq_record, None, None)

        INSERTION_1_BASE = 'T'
        insertion_1_data = {
            'position': len(before_insertion_1),
            'sequence': INSERTION_1_BASE
        }
        maker.handle_insertion(insertion_1_data)

        INSERTION_2_BASE = 'G'
        insertion_2_data = {
            'position': (len(before_insertion_1) + len(after_insertion_1) +
                         len(before_insertion_2)),
            'sequence':
            INSERTION_2_BASE
        }
        maker.handle_insertion(insertion_2_data)

        EXPECTED_SEQ = (before_insertion_1 + INSERTION_1_BASE +
                        after_insertion_1 + before_insertion_2 +
                        INSERTION_2_BASE + after_insertion_2)
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))
Example #6
0
    def test_combo(self):
        """Tests a combination of insertions, deletions, and snps.
        """
        before_deletion = 'AAAAA'
        deleted_bases = 'TTT'
        after_deletion = 'GGGGGGGGGGG'
        after_deletion_with_snp = 'GGGCGGGGGGG'
        insertion = 'AAA'
        after_insertion = 'TTTTT'
        raw_seq_str = (before_deletion + deleted_bases + after_deletion +
                       after_insertion)
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)
        maker = VCFToGenbankMaker(seq_record, None, None)

        # Make deletion.
        deletion_data = {
            'interval':
            (len(before_deletion), len(before_deletion) + len(deleted_bases))
        }
        maker.handle_deletion(deletion_data)

        # Make insertion.
        insertion_start = (len(before_deletion) + len(deleted_bases) +
                           len(after_deletion))
        insertion_1_data = {'position': insertion_start, 'sequence': insertion}
        maker.handle_insertion(insertion_1_data)

        # Make SNP.
        insertion_start = (len(before_deletion) + len(deleted_bases) +
                           after_deletion_with_snp.index('C'))
        snp_data = {'position': insertion_start, 'ref': 'G', 'alt': 'C'}
        maker.handle_snp(snp_data)

        EXPECTED_SEQ = (before_deletion + after_deletion_with_snp + insertion +
                        after_insertion)
        self.assertEqual(len(EXPECTED_SEQ), len(seq_record.seq))
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))