Ejemplo n.º 1
0
    def __init__(self,
                 contig,
                 cutout,
                 score=None,
                 cigar=None,
                 strand=1,
                 match=1,
                 mismatch=2,
                 gapopen=5,
                 gapextend=0):
        if score is None:
            score, cigar, strand = align_both_strands(cutout, contig, match,
                                                      mismatch, gapopen,
                                                      gapextend)
        self.contig = contig
        self.cutout = cutout
        self.score = score
        self.strand = strand
        self.matedist = None

        self.tok = AlignmentTokenizer(self.varseq, self.refrseq, cigar)
        self.cigar = self.tok._cigar

        self.vartype = None
        snvpattern = r'^((\d+)([DI]))?(\d+)M((\d+)[DI])?$'
        indelpattern = r'^((\d+)([DI]))?(\d+)M(\d+)([ID])(\d+)M((\d+)[DI])?$'
        if re.search(snvpattern, self.cigar):
            self.vartype = 'snv'
        elif re.search(indelpattern, self.cigar):
            self.vartype = 'indel'
Ejemplo n.º 2
0
def test_nomargin():
    qfile = kevlar.open(data_file('nomargin-r-indel-contigs.augfasta'), 'r')
    tfile = kevlar.open(data_file('nomargin-r-gdna.fa'), 'r')
    query = next(kevlar.parse_augmented_fastx(qfile))
    target = next(kevlar.parse_augmented_fastx(tfile))
    cigar, score = kevlar.align(target.sequence, query.sequence)
    tok = AlignmentTokenizer(query.sequence, target.sequence, cigar)
    assert tok._cigar == tok._origcigar
Ejemplo n.º 3
0
def test_gap_center_aligned(contig, gdna, newcigar, origcigar, nblocks):
    qfile = kevlar.open(data_file('cigar/' + contig), 'r')
    tfile = kevlar.open(data_file('cigar/' + gdna), 'r')
    query = next(kevlar.parse_augmented_fastx(qfile))
    target = next(kevlar.parse_augmented_fastx(tfile))
    cigar, score = kevlar.align(target.sequence, query.sequence)
    tok = AlignmentTokenizer(query.sequence, target.sequence, cigar)
    assert len(tok.blocks) == nblocks
    assert tok._cigar == newcigar
    assert tok._origcigar == origcigar
Ejemplo n.º 4
0
def test_gap_center_aligned():
    query = next(
        kevlar.parse_augmented_fastx(
            kevlar.open(data_file('cigar/b.contig.fa'), 'r')))
    target = next(
        kevlar.parse_augmented_fastx(
            kevlar.open(data_file('cigar/b.gdna.fa'), 'r')))
    cigar, score = kevlar.align(target.sequence, query.sequence)
    tok = AlignmentTokenizer(query.sequence, target.sequence, cigar)
    assert len(tok.blocks) == 3
    assert tok._cigar == '41D150M50D'
    assert tok._origcigar == '41D144M50D6M'
Ejemplo n.º 5
0
def test_blocks(contig, gdna):
    query = next(kevlar.parse_augmented_fastx(kevlar.open(contig, 'r')))
    target = next(kevlar.parse_augmented_fastx(kevlar.open(gdna, 'r')))
    cigar, score = kevlar.align(target.sequence, query.sequence)
    tok = AlignmentTokenizer(query.sequence, target.sequence, cigar)
    for block in tok.blocks:
        assert block.type in ('M', 'D', 'I')
        if block.type in ('M', 'D'):
            assert len(block.target) == block.length
        else:
            assert block.target is None
        if block.type in ('M', 'I'):
            assert len(block.query) == block.length
        else:
            assert block.query is None
Ejemplo n.º 6
0
    def __init__(self,
                 contig,
                 cutout,
                 score=None,
                 cigar=None,
                 strand=1,
                 match=1,
                 mismatch=2,
                 gapopen=5,
                 gapextend=0,
                 homopolyfilt=True,
                 nocall=False):
        if score is None and not nocall:
            score, cigar, strand = align_both_strands(cutout, contig, match,
                                                      mismatch, gapopen,
                                                      gapextend)
        self.contig = contig
        self.cutout = cutout
        self.nocall = nocall
        self.vartype = None
        if nocall:
            self.score = 0
            return
        self.score = score
        self.strand = strand
        self.do_homopolymer_filter = homopolyfilt
        self.trimmed = 0

        self.tok = AlignmentTokenizer(self.varseq, self.refrseq, cigar)
        self.cigar = self.tok._cigar

        snvpattern = r'^((\d+)([DI]))?(\d+)M((\d+)[DI])?$'
        indelpattern = r'^((\d+)([DI]))?(\d+)M(\d+)([ID])(\d+)M((\d+)[DI])?$'
        if re.search(snvpattern, self.cigar):
            self.vartype = 'snv'
        elif re.search(indelpattern, self.cigar):
            self.vartype = 'indel'