def check_align(self, kread, mer, skmers, type='setup'): match = False v1 = olc.nw(self.aseq.seq, kread.seq) v2 = olc.nw(kread.seq, self.aseq.seq) # print 'check_align()' # print 'Consensus seq', self.aseq.seq # print 'Read seq', kread.seq # print v1 # print v2 min_score = float(min(len(self.aseq.seq), len(kread.seq))) / 4.0 ident1 = float(v1[6]) / float(v1[2] - v1[3]) ident2 = float(v2[6]) / float(v2[2] - v2[3]) # print 'Min score', min_score, ', identity of overlap segments', ident1, ident2 if (v1[6] < min_score or ident1 < 0.9) and (v2[6] < min_score or ident2 < 0.9): return False if v1[6] == v2[6] and v1[3] == 0 and v1[5] == 0: # print 'Consensus and read sequence are the same' return True if v1[6] == v2[6]: match = True if len(self.aseq.seq) < len(kread.seq) or (v1[2] == len( self.aseq.seq) and v1[3] == 0): # consensus sequence is a subseq of kread self.aseq = assembly_seq(kread.seq) # print 'Consensus is subseq of kread' self.aseq.add_subseq(v1[5], v1[4]) if type == 'grow': self.set_kmers(skmers) elif len(kread.seq) < len(self.aseq.seq) or (v2[2] == len( kread.seq) and v2[3] == 0): # print 'Consensus contains read seq' self.aseq.add_subseq(v2[5], v2[4]) else: match = False indx11 = v1[0].replace('-', '').find(mer) indx12 = v1[1].replace('-', '').find(mer) indx21 = v2[0].replace('-', '').find(mer) indx22 = v2[1].replace('-', '').find(mer) if indx11 > -1 and indx12 > -1: if (indx21 == -1 and indx22 == -1) or ( abs(indx21 - indx22) > abs(indx11 - indx12)): match = True self.contig_overlap_read(v1, kread, skmers, type) elif indx21 > -1 and indx22 > -1: if (indx11 == -1 and indx12 == -1) or ( abs(indx21 - indx22) < abs(indx11 - indx12)): match = True self.read_overlap_contig(v2, kread, skmers, type) elif v1[6] > v2[6]: match = True self.contig_overlap_read(v1, kread, skmers, type) else: # Read hangs left match = True self.read_overlap_contig(v2, kread, skmers, type) return match
def check_align(self, kread, mer, nreads, skmers, type='setup') : match = False v1 = olc.nw(self.aseq.seq, kread.seq) v2 = olc.nw(kread.seq, self.aseq.seq) # print 'check_align()' # print 'Consensus seq', self.aseq.seq # print 'Read seq', kread.seq # print 'Resolve counts' , resolve_counts # print v1 # print v2 min_score = float(min(len(self.aseq.seq), len(kread.seq) )) / 4.0 ident1 = round(float(v1[6]) / float(v1[2] - v1[3]), 2) ident2 = round(float(v2[6]) / float(v2[2] - v2[3]), 2) # print 'Min score', min_score, ', identity of overlap segments', ident1, ident2 if (v1[6] < min_score or ident1 < 0.90) and (v2[6] < min_score or ident2 < 0.90) : return False if v1[6] == v2[6] and v1[3] == 0 and v1[5] == 0 and len(self.aseq.seq) == len(kread.seq) : # print 'Consensus and read sequence are the same' return True if v1[6] == v2[6] : match = True if len(self.aseq.seq) < len(kread.seq) or (v1[2] == len(self.aseq.seq) and v1[3] == 0) : # consensus sequence is a subseq of kread # max_counts = max(self.aseq.counts) # self.aseq = assembly_seq(kread, nreads) # print 'Consensus is subseq of kread' self.aseq.set_superseq(kread, nreads, v1[5], v1[4]) # self.aseq.add_subseq(v1[5], v1[4], max_counts, kread.indel_only) if type == 'grow' : self.set_kmers(skmers) elif len(kread.seq) < len(self.aseq.seq) or (v2[2] == len(kread.seq) and v2[3] == 0): # print 'Consensus contains read seq' self.aseq.add_subseq(v2[5], v2[4], nreads, kread.indel_only) else : match = False indx11 = v1[0].replace('-','').find(mer) indx12 = v1[1].replace('-','').find(mer) indx21 = v2[0].replace('-','').find(mer) indx22 = v2[1].replace('-','').find(mer) if indx11 > -1 and indx12 > -1 : if (indx21 == -1 and indx22 == -1) or (abs(indx21 - indx22) > abs(indx11 - indx12)) : match = True self.contig_overlap_read(v1, kread, nreads, skmers, type) elif indx21 > -1 and indx22 > -1 : if (indx11 == -1 and indx12 == -1) or (abs(indx21 - indx22) < abs(indx11 - indx12)) : match = True self.read_overlap_contig(v2, kread, nreads, skmers, type) elif v1[6] > v2[6] : match = True self.contig_overlap_read(v1, kread, nreads, skmers, type) else : # Read hangs left match = True self.read_overlap_contig(v2, kread, nreads, skmers, type) return match
def subseq(seq1, seq2) : aln = olc.nw(seq2, seq1) seq2_sub = (False,None) if aln[2] == len(seq2) and aln[3] == 0 and aln[6] >= (0.90*(len(seq2))) : if len(seq2) < len(seq1) : seq2_sub = (True,None) else : seq2_sub = (True,aln[6]) else : seq2_sub = (False,aln[6]) return seq2_sub
def subseq(seq1, seq2): aln = olc.nw(seq2, seq1) seq2_sub = (False, None) if aln[2] == len(seq2) and aln[3] == 0 and aln[6] >= (0.90 * (len(seq2))): if len(seq2) < len(seq1): seq2_sub = (True, None) else: seq2_sub = (True, aln[6]) else: seq2_sub = (False, aln[6]) return seq2_sub
def same_reads(seq1, seq2) : same = False aln = olc.nw(seq1, seq2) if aln[3] == 0 and aln[5] == 0 and aln[6] > 0.95*(len(seq1)) : same = True return same
def same_reads(seq1, seq2): same = False aln = olc.nw(seq1, seq2) if aln[3] == 0 and aln[5] == 0 and aln[6] > 0.95 * (len(seq1)): same = True return same