Beispiel #1
0
    def test_mismatch(self):
        """ Compare SEQ and CIGAR for a spliced transcript that contains a
            mismatch. """

        sam = "input_files/sams/mismatch.sam"
        genome = Fasta("input_files/hg38_chr1.fa")
        sjDict = set()

        with open(sam, 'r') as f:
            sam_line = f.readline().strip().split('\t')
            transcript = t2.Transcript(sam_line, genome, sjDict)

        assert t2.check_seq_and_cigar_length(transcript.SEQ,
                                             transcript.CIGAR) == True
Beispiel #2
0
    def test_perfect_match_with_introns(self):
        """ Compare SEQ and CIGAR for a transcript that is a perfect
            reference match containing introns. """

        sam = "input_files/sams/perfectReferenceMatch_twoIntrons.sam"
        genome = Fasta("input_files/hg38_chr1.fa")
        sjDict = set()

        with open(sam, 'r') as f:
            sam_line = f.readline().strip().split('\t')
            transcript = t2.Transcript(sam_line, genome, sjDict)

        assert t2.check_seq_and_cigar_length(transcript.SEQ,
                                             transcript.CIGAR) == True
Beispiel #3
0
    def test_perfect_match(self):
        """ Since this read is a perfect match to the reference, its CIGAR and
            sequence fields should definitely be the same length """

        sam = "input_files/sams/perfectReferenceMatch_noIntrons.sam"
        genome = Fasta("input_files/hg38_chr1.fa")
        sjDict = set()

        with open(sam, 'r') as f:
            sam_line = f.readline().strip().split('\t')
            transcript = t2.Transcript(sam_line, genome, sjDict)

        assert t2.check_seq_and_cigar_length(transcript.SEQ,
                                             transcript.CIGAR) == True
Beispiel #4
0
    def test_insertion_deletion_mismatch_ncsj(self):
        """ Compare SEQ and CIGAR for a transcript that contains an 
           insertion, deletion, mismatch, and noncanonical splice junction in 
           it. """

        sam = "input_files/sams/deletion_insertion_mismatch_nc.sam"
        genome = Fasta("input_files/hg38_chr1.fa")
        sjDict = set()

        with open(sam, 'r') as f:
            sam_line = f.readline().strip().split('\t')
            transcript = t2.Transcript(sam_line, genome, sjDict)

        assert t2.check_seq_and_cigar_length(transcript.SEQ,
                                             transcript.CIGAR) == True
Beispiel #5
0
    def test_pre_correction_dmel(self):
        """ This is a noisy Drosophila read, but prior to correction, the CIGAR
            and SEQ strings should definitely match """

        sam = "input_files/drosophila_example/input_read.sam"
        genome = Fasta("input_files/drosophila_example/chr3R.fa")
        sjDict = set()

        with open(sam, 'r') as f:
            for sam_line in f:
                if sam_line.startswith("@"):
                    continue
                else:
                    sam_line = sam_line.strip().split('\t')
                    transcript = t2.Transcript(sam_line, genome, sjDict)

        assert t2.check_seq_and_cigar_length(transcript.SEQ,
                                             transcript.CIGAR) == True
Beispiel #6
0
    def test_not_matching(self):
        """ This is the sam read as above but after correction with TCv2.0.1.
            The read got flagged by samtools after correction as having 
            inconsistent CIGAR and SEQ fields """

        sam = "input_files/drosophila_example/bad_correction.sam"
        genome = Fasta("input_files/drosophila_example/chr3R.fa")
        sjDict = set()

        with open(sam, 'r') as f:
            for sam_line in f:
                if sam_line.startswith("@"):
                    continue
                else:
                    sam_line = sam_line.strip().split('\t')
                    transcript = t2.Transcript(sam_line, genome, sjDict)

        assert t2.check_seq_and_cigar_length(transcript.SEQ,
                                             transcript.CIGAR) == False