Esempio n. 1
0
    def call_variants(self, ksize, mindist=6, logstream=sys.stderr):
        """Attempt to call variants from this contig alignment.

        If the alignment CIGAR matches a known pattern, the appropriate caller
        is invoked (SNV or INDEL caller). If not, a "no call" is reported.

        If an SNV call is within `mindist` base pairs of the end of the
        alignment it is ignored. Set to `None` to disable this behavior.

        Variant calls with no spanning interesting k-mers are designated as
        "passenger calls" and discarded.
        """
        offset = 0 if self.targetshort else self.offset
        if self.vartype == 'snv':
            caller = self.call_snv(self.match.query,
                                   self.match.target,
                                   offset,
                                   ksize,
                                   mindist,
                                   logstream=logstream)
            for call in caller:
                if self.is_passenger(call):
                    call.filter(vf.PassengerVariant)
                yield call
        elif self.vartype == 'indel':
            indelcaller = self.call_indel(ksize)
            indel = next(indelcaller)
            if self.is_passenger(indel):
                indel.filter(vf.PassengerVariant)
            yield indel

            leftflankcaller = self.call_snv(self.leftflank.query,
                                            self.leftflank.target,
                                            offset,
                                            ksize,
                                            mindist,
                                            donocall=False)
            offset += self.leftflank.length
            if self.indeltype == 'D':
                offset += self.indel.length
            rightflankcaller = self.call_snv(self.rightflank.query,
                                             self.rightflank.target,
                                             offset,
                                             ksize,
                                             mindist,
                                             donocall=False)
            for call in chain(leftflankcaller, rightflankcaller):
                if self.is_passenger(call):
                    call.filter(vf.PassengerVariant)
                yield call
        else:
            nocall = Variant(self.seqid,
                             self.pos,
                             '.',
                             '.',
                             CONTIG=self.varseq,
                             CIGAR=self.cigar,
                             KSW2=str(self.score))
            nocall.filter(vf.InscrutableCigar)
            yield nocall
Esempio n. 2
0
    def call_snv(self,
                 qseq,
                 tseq,
                 offset,
                 ksize,
                 mindist=6,
                 donocall=True,
                 logstream=sys.stderr):
        """Call SNVs from the aligned mismatched sequences.

        The `qseq` and `tseq` are strings containing query and target sequences
        of identical length; `mismatches` is a list of positions where `qseq`
        and `tseq` do not match; `offset` is the number of 5' nucleotides in
        the target not aligned to the query; and `ksize` is used to compute a
        window that spans all reference allele k-mers in `tseq` and all
        alternate allele k-mers in `qseq`.
        """
        length = len(qseq)
        assert len(tseq) == length
        diffs = [i for i in range(length) if tseq[i] != qseq[i]]
        if mindist:
            diffs = trim_terminal_snvs(diffs, length, mindist, logstream)
        if len(diffs) == 0:
            if donocall:
                nocall = Variant(self.seqid,
                                 self.cutout.local_to_global(offset),
                                 '.',
                                 '.',
                                 CONTIG=qseq,
                                 CIGAR=self.cigar,
                                 KSW2=str(self.score),
                                 IKMERS=str(len(self.contig.annotations)))
                nocall.filter(vf.PerfectMatch)
                yield nocall
            return

        for pos in diffs:
            minpos = max(pos - ksize + 1, 0)
            maxpos = min(pos + ksize, length)
            altwindow = qseq[minpos:maxpos]
            refrwindow = tseq[minpos:maxpos]

            refr = tseq[pos].upper()
            alt = qseq[pos].upper()
            localcoord = pos + offset
            globalcoord = self.cutout.local_to_global(localcoord)
            nikmers = n_ikmers_present(self.contig, altwindow)
            snv = Variant(self.seqid,
                          globalcoord,
                          refr,
                          alt,
                          CONTIG=qseq,
                          CIGAR=self.cigar,
                          KSW2=str(self.score),
                          IKMERS=str(nikmers),
                          ALTWINDOW=altwindow,
                          REFRWINDOW=refrwindow)
            yield snv
Esempio n. 3
0
def test_filter_field():
    v = Variant('scaffold1', 12345, '.', '.')
    assert v.filterstr == '.'
    v.filter(vf.InscrutableCigar)
    assert v.filterstr == 'InscrutableCigar'

    v = Variant('chr1', 55555, '.', '.')
    v.filter(vf.PerfectMatch)
    assert v.filterstr == 'PerfectMatch'

    v = Variant('1', 809768, 'C', 'CAT')
    assert v.filterstr == 'PASS'
    v.filter(vf.PassengerVariant)
    assert v.filterstr == 'PassengerVariant'
    v.filter(vf.Homopolymer)
    assert v.filterstr == 'Homopolymer;PassengerVariant'

    v = Variant('one', 112358, 'T', 'A')
    v.filter('SNPyMcSNPface')
    v.filter(6.022e23)
    v.filter(dict(chicken='waffles', biscuits='gravy'))
    v.filterstr == 'PASS'  # These "filters" shouldn't actually do anything