Ejemplo n.º 1
0
    def check_align(self, kread, mer, skmers, type='setup'):
        match = False
        v1 = olc.nw(self.aseq.seq, kread.seq)
        v2 = olc.nw(kread.seq, self.aseq.seq)
        #    print 'check_align()'
        #    print 'Consensus seq', self.aseq.seq
        #    print 'Read seq', kread.seq
        #    print v1
        #    print v2
        min_score = float(min(len(self.aseq.seq), len(kread.seq))) / 4.0
        ident1 = float(v1[6]) / float(v1[2] - v1[3])
        ident2 = float(v2[6]) / float(v2[2] - v2[3])
        #    print 'Min score', min_score, ', identity of overlap segments', ident1, ident2

        if (v1[6] < min_score or ident1 < 0.9) and (v2[6] < min_score
                                                    or ident2 < 0.9):
            return False
        if v1[6] == v2[6] and v1[3] == 0 and v1[5] == 0:
            #      print 'Consensus and read sequence are the same'
            return True
        if v1[6] == v2[6]:
            match = True
            if len(self.aseq.seq) < len(kread.seq) or (v1[2] == len(
                    self.aseq.seq) and v1[3] == 0):
                # consensus sequence is a subseq of kread
                self.aseq = assembly_seq(kread.seq)
                #        print 'Consensus is subseq of kread'
                self.aseq.add_subseq(v1[5], v1[4])
                if type == 'grow':
                    self.set_kmers(skmers)
            elif len(kread.seq) < len(self.aseq.seq) or (v2[2] == len(
                    kread.seq) and v2[3] == 0):
                #        print 'Consensus contains read seq'
                self.aseq.add_subseq(v2[5], v2[4])
            else:
                match = False
                indx11 = v1[0].replace('-', '').find(mer)
                indx12 = v1[1].replace('-', '').find(mer)
                indx21 = v2[0].replace('-', '').find(mer)
                indx22 = v2[1].replace('-', '').find(mer)
                if indx11 > -1 and indx12 > -1:
                    if (indx21 == -1 and indx22 == -1) or (
                            abs(indx21 - indx22) > abs(indx11 - indx12)):
                        match = True
                        self.contig_overlap_read(v1, kread, skmers, type)
                elif indx21 > -1 and indx22 > -1:
                    if (indx11 == -1 and indx12 == -1) or (
                            abs(indx21 - indx22) < abs(indx11 - indx12)):
                        match = True
                        self.read_overlap_contig(v2, kread, skmers, type)
        elif v1[6] > v2[6]:
            match = True
            self.contig_overlap_read(v1, kread, skmers, type)
        else:
            # Read hangs left
            match = True
            self.read_overlap_contig(v2, kread, skmers, type)
        return match
Ejemplo n.º 2
0
  def check_align(self, kread, mer, nreads, skmers, type='setup') :
    match = False
    v1 = olc.nw(self.aseq.seq, kread.seq)
    v2 = olc.nw(kread.seq, self.aseq.seq)
#    print 'check_align()'
#    print 'Consensus seq', self.aseq.seq
#    print 'Read seq', kread.seq
#    print 'Resolve counts' , resolve_counts
#    print v1
#    print v2
    min_score = float(min(len(self.aseq.seq), len(kread.seq) )) / 4.0
    ident1 = round(float(v1[6]) / float(v1[2] - v1[3]), 2)
    ident2 = round(float(v2[6]) / float(v2[2] - v2[3]), 2)
#    print 'Min score', min_score, ', identity of overlap segments', ident1, ident2

    if (v1[6] < min_score or ident1 < 0.90) and (v2[6] < min_score or ident2 < 0.90) :
      return False
    if v1[6] == v2[6] and v1[3] == 0 and v1[5] == 0 and len(self.aseq.seq) == len(kread.seq) :
#      print 'Consensus and read sequence are the same'
      return True
    if v1[6] == v2[6] :
      match = True
      if len(self.aseq.seq) < len(kread.seq) or (v1[2] == len(self.aseq.seq) and v1[3] == 0) :
        # consensus sequence is a subseq of kread
#        max_counts = max(self.aseq.counts)
#        self.aseq = assembly_seq(kread, nreads)
#        print 'Consensus is subseq of kread'
        self.aseq.set_superseq(kread, nreads, v1[5], v1[4])
#        self.aseq.add_subseq(v1[5], v1[4], max_counts, kread.indel_only) 
        if type == 'grow' : 
          self.set_kmers(skmers)
      elif len(kread.seq) < len(self.aseq.seq) or (v2[2] == len(kread.seq) and v2[3] == 0):
#        print 'Consensus contains read seq'
        self.aseq.add_subseq(v2[5], v2[4], nreads, kread.indel_only)
      else :
        match = False
        indx11 = v1[0].replace('-','').find(mer)
        indx12 = v1[1].replace('-','').find(mer)
        indx21 = v2[0].replace('-','').find(mer)
        indx22 = v2[1].replace('-','').find(mer)
        if indx11 > -1 and indx12 > -1 :
          if (indx21 == -1 and indx22 == -1) or (abs(indx21 - indx22) > abs(indx11 - indx12)) :
            match = True
            self.contig_overlap_read(v1, kread, nreads, skmers, type)
        elif indx21 > -1 and indx22 > -1 :
          if (indx11 == -1 and indx12 == -1) or (abs(indx21 - indx22) < abs(indx11 - indx12)) :
            match = True
            self.read_overlap_contig(v2, kread, nreads, skmers, type)  
    elif v1[6] > v2[6] :
      match = True
      self.contig_overlap_read(v1, kread, nreads, skmers, type)
    else :
      # Read hangs left
      match = True
      self.read_overlap_contig(v2, kread, nreads, skmers, type)
    return match
Ejemplo n.º 3
0
def subseq(seq1, seq2) : 
  aln = olc.nw(seq2, seq1)
  seq2_sub = (False,None)
  if aln[2] == len(seq2) and aln[3] == 0 and aln[6] >= (0.90*(len(seq2))) : 
    if len(seq2) < len(seq1) :
      seq2_sub = (True,None)
    else :
      seq2_sub = (True,aln[6]) 
  else :
    seq2_sub = (False,aln[6])
  return seq2_sub
Ejemplo n.º 4
0
def subseq(seq1, seq2):
    aln = olc.nw(seq2, seq1)
    seq2_sub = (False, None)
    if aln[2] == len(seq2) and aln[3] == 0 and aln[6] >= (0.90 * (len(seq2))):
        if len(seq2) < len(seq1):
            seq2_sub = (True, None)
        else:
            seq2_sub = (True, aln[6])
    else:
        seq2_sub = (False, aln[6])
    return seq2_sub
Ejemplo n.º 5
0
def same_reads(seq1, seq2) :
  same = False
  aln = olc.nw(seq1, seq2)
  if aln[3] == 0 and aln[5] == 0 and aln[6] > 0.95*(len(seq1)) :
    same = True
  return same
Ejemplo n.º 6
0
def same_reads(seq1, seq2):
    same = False
    aln = olc.nw(seq1, seq2)
    if aln[3] == 0 and aln[5] == 0 and aln[6] > 0.95 * (len(seq1)):
        same = True
    return same