Ejemplo n.º 1
0
    def testInsertionQualityJustEnough(self):
        ins1 = {2: ('CCC', 'JJJ')}
        ins2 = {10: ('GGG', 'JBJ')}
        expected_merged = {2: 'CCC', 10: 'GGG'}

        merged = merge_inserts(ins1, ins2, q_cutoff=32)

        self.assertEqual(expected_merged, merged)
Ejemplo n.º 2
0
    def testNone(self):
        ins1 = None
        ins2 = None
        expected_merged = {}

        merged = merge_inserts(ins1, ins2)

        self.assertEqual(expected_merged, merged)
Ejemplo n.º 3
0
    def testConflictingInsertionsTooCloseInQuality(self):
        ins1 = {2: ('CCC', 'JJJ')}
        ins2 = {2: ('CTC', 'JAJ')}
        expected_merged = {2: 'CNC'}

        merged = merge_inserts(ins1, ins2, minimum_q_delta=20)

        self.assertEqual(expected_merged, merged)
Ejemplo n.º 4
0
    def testInsertionQualityTooLowReverse(self):
        ins1 = {2: ('CCC', 'JJJ')}
        ins2 = {10: ('GGG', 'JAJ')}
        expected_merged = {2: 'CCC'}

        merged = merge_inserts(ins1, ins2, q_cutoff=32)

        self.assertEqual(expected_merged, merged)
Ejemplo n.º 5
0
    def testConflictingInsertions(self):
        ins1 = {2: ('CCC', 'JJJ')}
        ins2 = {2: ('CTC', 'JAJ')}
        expected_merged = {2: 'CCC'}

        merged = merge_inserts(ins1, ins2)

        self.assertEqual(expected_merged, merged)
Ejemplo n.º 6
0
    def testIdenticalInsertions(self):
        ins1 = {2: ('CCC', 'JJJ')}
        ins2 = {2: ('CCC', 'JJJ')}
        expected_merged = {2: 'CCC'}

        merged = merge_inserts(ins1, ins2)

        self.assertEqual(expected_merged, merged)
Ejemplo n.º 7
0
    def testSeparateInsertions(self):
        ins1 = {2: ('CCC', 'JJJ')}
        ins2 = {10: ('GGG', 'JJJ')}
        expected_merged = {2: 'CCC', 10: 'GGG'}

        merged = merge_inserts(ins1, ins2)

        self.assertEqual(expected_merged, merged)
Ejemplo n.º 8
0
def merge_reads(quality_cutoff, read_pair):
    """ Merge a pair of reads.

    Also skip reads that don't meet certain criteria.
    @param quality_cutoff: minimum quality score for a base to be counted
    @param read_pair: a sequence of two sequences, each with fields from a
    SAM file record
    @return: (rname, mseq, merged_inserts, qual1, qual2) or None to skip the pair
    """
    read1, read2 = read_pair
    if read2 and read1[2] != read2[2]:
        # region mismatch, ignore the read pair.
        return None
    filtered_reads = []
    rname = None
    for read in read_pair:
        if not read:
            continue
        (_qname,
         flag,
         rname,
         refpos_str,
         _mapq,
         cigar,
         _rnext,
         _pnext,
         _tlen,
         seq,
         qual) = read[:11]  # ignore optional fields
        if is_unmapped_read(flag):
            continue
        filtered_reads.append(dict(rname=rname,
                                   cigar=cigar,
                                   seq=seq,
                                   qual=qual,
                                   pos=int(refpos_str)))
    if not filtered_reads:
        return None
    seq1, qual1, ins1 = apply_cigar(filtered_reads[0]['cigar'],
                                    filtered_reads[0]['seq'],
                                    filtered_reads[0]['qual'],
                                    filtered_reads[0]['pos']-1)
    if len(filtered_reads) == 1:
        seq2 = qual2 = ''
        ins2 = None
    else:
        seq2, qual2, ins2 = apply_cigar(filtered_reads[1]['cigar'],
                                        filtered_reads[1]['seq'],
                                        filtered_reads[1]['qual'],
                                        filtered_reads[1]['pos']-1)
    mseq = merge_pairs(seq1, seq2, qual1, qual2, q_cutoff=quality_cutoff)
    merged_inserts = merge_inserts(ins1, ins2, quality_cutoff)
    return rname, mseq, merged_inserts, qual1, qual2
Ejemplo n.º 9
0
def merge_reads(quality_cutoff, read_pair):
    """ Merge a pair of reads.

    Also skip reads that don't meet certain criteria.
    @param quality_cutoff: minimum quality score for a base to be counted
    @param read_pair: a sequence of two sequences, each with fields from a
    SAM file record
    @return: (rname, mseq, merged_inserts, qual1, qual2) or None to skip the pair
    """
    read1, read2 = read_pair
    if read2 and read1[2] != read2[2]:
        # region mismatch, ignore the read pair.
        return None
    filtered_reads = []
    for read in read_pair:
        if not read:
            continue
        (_qname,
         flag,
         rname,
         refpos_str,
         _mapq,
         cigar,
         _rnext,
         _pnext,
         _tlen,
         seq,
         qual) = read[:11]  # ignore optional fields
        if is_unmapped_read(flag):
            continue
        filtered_reads.append(dict(rname=rname,
                                   cigar=cigar,
                                   seq=seq,
                                   qual=qual,
                                   pos=int(refpos_str)))
    if not filtered_reads:
        return None
    seq1, qual1, ins1 = apply_cigar(filtered_reads[0]['cigar'],
                                    filtered_reads[0]['seq'],
                                    filtered_reads[0]['qual'],
                                    filtered_reads[0]['pos']-1)
    if len(filtered_reads) == 1:
        seq2 = qual2 = ''
        ins2 = None
    else:
        seq2, qual2, ins2 = apply_cigar(filtered_reads[1]['cigar'],
                                        filtered_reads[1]['seq'],
                                        filtered_reads[1]['qual'],
                                        filtered_reads[1]['pos']-1)
    mseq = merge_pairs(seq1, seq2, qual1, qual2, q_cutoff=quality_cutoff)
    merged_inserts = merge_inserts(ins1, ins2, quality_cutoff)
    return rname, mseq, merged_inserts, qual1, qual2