def iterator_filter_overlapping_query(psls, options): '''remove alignments that overlap on query. If multiple alignments overlap, the one with the highest number of matching nucleotides is chosen. ''' # note: only takes the full ranges, but does not check for # individual overlap of blocks use connected components and # hasAlignmentOverlap ninput, noutput, ndiscarded = 0, 0, 0 last_contig = None for block in Blat.iterator_query_overlap( psls, options.threshold_merge_distance): # commented code is for base-level filtering, which is very slow # disabled for now # if block[0].mQueryId != last_contig: # last_contig = block[0].mQueryId # E.info( "processing %s" % last_contig ) l = len(block) ninput += l if l > 1: ndiscarded += l # components = Blat.getComponents( block, by_query = True ) # for component in components: # m = [ block[x] for x in component ] # m.sort( key = lambda x: -x.mNMatches ) # ndiscarded += len(m) - 1 # yield m[0] # noutput += 1 else: yield block[0] noutput += 1 E.info("iterator_filter_overlapping_query: ninput=%i, " "noutput=%i, ndiscarded=%i" % (ninput, noutput, ndiscarded))
def iterator_filter_overlapping_query(psls, options): '''remove alignments that overlap on query. If multiple alignments overlap, the one with the highest number of matching nucleotides is chosen. ''' # note: only takes the full ranges, but does not check for # individual overlap of blocks use connected components and # hasAlignmentOverlap ninput, noutput, ndiscarded = 0, 0, 0 last_contig = None for block in Blat.iterator_query_overlap(psls, options.threshold_merge_distance): # commented code is for base-level filtering, which is very slow # disabled for now # if block[0].mQueryId != last_contig: # last_contig = block[0].mQueryId # E.info( "processing %s" % last_contig ) l = len(block) ninput += l if l > 1: ndiscarded += l # components = Blat.getComponents( block, by_query = True ) # for component in components: # m = [ block[x] for x in component ] # m.sort( key = lambda x: -x.mNMatches ) # ndiscarded += len(m) - 1 # yield m[0] # noutput += 1 else: yield block[0] noutput += 1 E.info("iterator_filter_overlapping_query: ninput=%i, " "noutput=%i, ndiscarded=%i" % (ninput, noutput, ndiscarded))