def process(matches): new = matches[0].copy() map_query2target = alignlib_lite.py_makeAlignmentBlocks() graph = networkx.DiGraph() graph.add_nodes_from(range(len(matches) + 2)) matches.sort(key=lambda x: x.mQueryFrom) if Genomics.IsPositiveStrand(matches[0].strand): f = lambda x, y: x.mSbjctTo < y.mSbjctFrom else: f = lambda x, y: x.mSbjctFrom > y.mSbjctTo for x in range(0, len(matches)): xx = matches[x] if options.loglevel >= 6: options.stdlog.write("# graph: %2i %s\n" % (x, str(xx))) for y in range(x + 1, len(matches)): yy = matches[y] d = min(xx.mQueryTo, yy.mQueryTo) - \ max(xx.mQueryFrom, yy.mQueryFrom) if d > 0 or not f(xx, yy): continue else: graph.add_edge(x, y, {'weight': -d}) source = len(matches) target = len(matches) + 1 for x in range(len(matches)): xx = matches[x] graph.add_edge(source, x, {'weight': xx.mQueryFrom}) graph.add_edge( x, target, {'weight': xx.mQueryLength - xx.mQueryTo}) if options.loglevel >= 6: networkx.write_edgelist(graph, options.stdlog) path = networkx.dijkstra_path(graph, source, target) if options.loglevel >= 6: options.stdlog.write("# path: %s\n" % (str(path))) new_matches = [matches[x] for x in path[1:-1]] if len(matches) != len(new_matches): E.warn(("query=%s, target=%s, strand=%s: " "removed overlapping/out-of-order segments: " "before=%i, after=%i") % (matches[0].mQueryId, matches[0].mSbjctId, matches[0].strand, len(matches), len(new_matches))) matches = new_matches for match in matches: m = match.getMapQuery2Target() alignlib_lite.py_addAlignment2Alignment(map_query2target, m) new.fromMap(map_query2target, use_strand=True) options.stdout.write(str(new) + "\n") options.stdout.flush() return 1
def Add(self, const_other, combine_contig=False, allow_overlap=False, contig_size=0, combine_queries=False, as_intron=False): """add one entry to another. This procedure allows to add - predictions on different contigs if combine_contig = True - overlapping predictions on the same query if allow_overlap = True - results from different queries if combine_queries = True - if as_intron is set to true, the new fragment is added as an intron. """ # create working copies of each prediction other = const_other.getCopy() this = self.getCopy() other.Expand() this.Expand() if as_intron: code = "I" else: code = "P" # check for query overlaps if this.mQueryToken == other.mQueryToken: query_overlap = max( 0, min(this.mQueryTo, other.mQueryTo) - max(this.mQueryFrom, other.mQueryFrom) + 1) if query_overlap > 0: if allow_overlap: overlap = query_overlap # if queries overlap, truncate this before adding the other this.mMapPeptide2Translation.removeRowRegion( this.mQueryTo - overlap + 1, this.mQueryTo) other.mMapPeptide2Translation.moveAlignment(0, -overlap) this.mQueryTo -= overlap this.mTranslation = this.mTranslation[:-overlap] # remove aligned residues from the back for x in range(len(this.mMapPeptide2Genome) - 1, 0, -1): if this.mMapPeptide2Genome[x][1] <= overlap: overlap -= this.mMapPeptide2Genome[x][1] del this.mMapPeptide2Genome[x] else: break this.mMapPeptide2Genome[-1] = ( this.mMapPeptide2Genome[-1][0], this.mMapPeptide2Genome[-1][1] - overlap, this.mMapPeptide2Genome[-1][2] - overlap * 3) else: raise ValueError, "refusing to add overlapping entries: overlap = %i, queries:\n%s\n%s\n, set allow_overlap = True " % ( query_overlap, str(this), str(other)) else: if not combine_queries: raise ValueError, "refusing to add different queries - set combine_queries = True." if this.mSbjctToken != other.mSbjctToken or \ this.mSbjctStrand != other.mSbjctStrand: if combine_contig: this.mSbjctToken += "-" + other.mSbjctToken this.mSbjctStrand += other.mSbjctStrand else: raise ValueError, "can not add different sbjct." sbjct_overlap = max( 0, min(this.mSbjctGenomeTo, other.mSbjctGenomeTo) - max(this.mSbjctGenomeFrom, other.mSbjctGenomeFrom), 0) if sbjct_overlap > 0: if not combine_contig: raise ValueError, "refusing to add overlapping entries: overlap = %i, sbjct:\n%s\n%s\n" % ( sbjct_overlap, str(this), str(other)) if this.mSbjctToken == other.mSbjctToken: # set precedence if this.mSbjctGenomeFrom < other.mSbjctGenomeFrom: first = this second = other else: first = other second = this # get length of gap d_na = second.mSbjctGenomeFrom - first.mSbjctGenomeTo if this.mQueryToken != other.mQueryToken: d_aa = first.mQueryLength - first.mQueryTo # create a new virtual query by concatenating # the two queries this.mQueryToken += "-" + other.mQueryToken # sort out the alignment second.mMapPeptide2Translation.moveAlignment( first.mQueryLength, 0) this.mQueryLength = first.mQueryLength + second.mQueryLength else: d_aa = second.mQueryFrom - first.mQueryTo - 1 this.mSbjctGenomeFrom = min(this.mSbjctGenomeFrom, other.mSbjctGenomeFrom) this.mSbjctGenomeTo = max(this.mSbjctGenomeTo, other.mSbjctGenomeTo) this.mMapPeptide2Genome = first.mMapPeptide2Genome + \ [(code, d_aa, d_na)] + second.mMapPeptide2Genome this.mTranslation = first.mTranslation + second.mTranslation second.mMapPeptide2Translation.moveAlignment(0, first.mSbjctTo - 1) else: # join on different contigs d_na = contig_size - this.mSbjctGenomeTo + \ other.mSbjctGenomeFrom + query_overlap * 3 d_aa = other.mQueryFrom - this.mQueryTo - 1 this.mMapPeptide2Genome += [(code, d_aa, d_na), ] + \ other.mMapPeptide2Genome this.mTranslation += other.mTranslation other.mMapPeptide2Translation.moveAlignment(0, this.mSbjctTo - 1) this.mSbjctGenomeFrom = this.mSbjctGenomeFrom this.mSbjctGenomeTo = contig_size + other.mSbjctGenomeTo # now fill self from first and this self.mQueryToken = first.mQueryToken self.mQueryLength = this.mQueryLength nthis = this.mMapPeptide2Translation.getLength( ) - this.mMapPeptide2Translation.getNumGaps() nother = other.mMapPeptide2Translation.getLength( ) - other.mMapPeptide2Translation.getNumGaps() self.mMapPeptide2Genome = first.mMapPeptide2Genome self.mSbjctGenomeFrom = this.mSbjctGenomeFrom self.mSbjctGenomeTo = this.mSbjctGenomeTo # there might be some reference counting issues, thus # do it the explicit way. alignlib_lite.py_addAlignment2Alignment(this.mMapPeptide2Translation, other.mMapPeptide2Translation) self.mMapPeptide2Translation = alignlib_lite.py_makeAlignmentVector() alignlib_lite.py_addAlignment2Alignment(self.mMapPeptide2Translation, this.mMapPeptide2Translation) self.mTranslation = this.mTranslation self.mQueryFrom = self.mMapPeptide2Translation.getRowFrom() self.mQueryTo = self.mMapPeptide2Translation.getRowTo() self.mSbjctFrom = self.mMapPeptide2Translation.getColFrom() self.mSbjctTo = self.mMapPeptide2Translation.getColTo() self.mQueryCoverage = 100.0 * \ (self.mQueryTo - self.mQueryFrom + 1) / float(self.mQueryLength) self.mAlignmentString = string.join( map(lambda x: string.join(map(str, x), " "), self.mMapPeptide2Genome), " ") f = alignlib_lite.py_AlignmentFormatEmssions( self.mMapPeptide2Translation) self.mQueryAli, self.mSbjctAli = f.mRowAlignment, f.mColAlignment # summary parameters self.mRank = max(this.mRank, other.mRank) self.score += other.score self.mNGaps += other.mNGaps self.mNFrameShifts += other.mNFrameShifts self.mNIntrons += other.mNIntrons + 1 self.mNStopCodons += other.mNStopCodons nnew = self.mMapPeptide2Translation.getLength( ) - self.mMapPeptide2Translation.getNumGaps() self.mPercentIdentity = min( 100.0, (self.mPercentIdentity * nthis + other.mPercentIdentity * nother) / nnew) self.mPercentSimilarity = min( 100.0, (self.mPercentSimilarity * nthis + other.mPercentSimilarity * nother) / nnew) self.mNAssembled += 1 + other.mNAssembled
def process(matches): new = matches[0].copy() map_query2target = alignlib_lite.py_makeAlignmentBlocks() graph = networkx.DiGraph() graph.add_nodes_from(xrange(len(matches) + 2)) matches.sort(key=lambda x: x.mQueryFrom) if Genomics.IsPositiveStrand(matches[0].strand): f = lambda x, y: x.mSbjctTo < y.mSbjctFrom else: f = lambda x, y: x.mSbjctFrom > y.mSbjctTo for x in range(0, len(matches)): xx = matches[x] if options.loglevel >= 6: options.stdlog.write("# graph: %2i %s\n" % (x, str(xx))) for y in range(x + 1, len(matches)): yy = matches[y] d = min(xx.mQueryTo, yy.mQueryTo) - \ max(xx.mQueryFrom, yy.mQueryFrom) if d > 0 or not f(xx, yy): continue else: graph.add_edge(x, y, {'weight': -d}) source = len(matches) target = len(matches) + 1 for x in range(len(matches)): xx = matches[x] graph.add_edge(source, x, {'weight': xx.mQueryFrom}) graph.add_edge( x, target, {'weight': xx.mQueryLength - xx.mQueryTo}) if options.loglevel >= 6: networkx.write_edgelist(graph, options.stdlog) path = networkx.dijkstra_path(graph, source, target) if options.loglevel >= 6: options.stdlog.write("# path: %s\n" % (str(path))) new_matches = [matches[x] for x in path[1:-1]] if len(matches) != len(new_matches): E.warn(("query=%s, target=%s, strand=%s: " "removed overlapping/out-of-order segments: " "before=%i, after=%i") % (matches[0].mQueryId, matches[0].mSbjctId, matches[0].strand, len(matches), len(new_matches))) matches = new_matches for match in matches: m = match.getMapQuery2Target() alignlib_lite.py_addAlignment2Alignment(map_query2target, m) new.fromMap(map_query2target, use_strand=True) options.stdout.write(str(new) + "\n") options.stdout.flush() return 1
def Add( self, const_other, combine_contig = False, allow_overlap = False, contig_size = 0, combine_queries = False, as_intron = False ): """add one entry to another. This procedure allows to add - predictions on different contigs if combine_contig = True - overlapping predictions on the same query if allow_overlap = True - results from different queries if combine_queries = True - if as_intron is set to true, the new fragment is added as an intron. """ ## create working copies of each prediction other = const_other.getCopy() this = self.getCopy() other.Expand() this.Expand() if as_intron: code = "I" else: code = "P" ## check for query overlaps if this.mQueryToken == other.mQueryToken: query_overlap = max( 0, min(this.mQueryTo, other.mQueryTo) -\ max(this.mQueryFrom, other.mQueryFrom) + 1) if query_overlap > 0: if allow_overlap: overlap = query_overlap ## if queries overlap, truncate this before adding the other this.mMapPeptide2Translation.removeRowRegion( this.mQueryTo - overlap + 1, this.mQueryTo ) other.mMapPeptide2Translation.moveAlignment( 0, -overlap ) this.mQueryTo -= overlap this.mTranslation = this.mTranslation[:-overlap] ## remove aligned residues from the back for x in range(len(this.mMapPeptide2Genome) - 1, 0, -1): if this.mMapPeptide2Genome[x][1] <= overlap: overlap -= this.mMapPeptide2Genome[x][1] del this.mMapPeptide2Genome[x] else: break this.mMapPeptide2Genome[-1] = (this.mMapPeptide2Genome[-1][0], this.mMapPeptide2Genome[-1][1] - overlap, this.mMapPeptide2Genome[-1][2] - overlap * 3) else: raise ValueError, "refusing to add overlapping entries: overlap = %i, queries:\n%s\n%s\n, set allow_overlap = True " % (query_overlap, str(this), str(other)) else: if not combine_queries: raise ValueError, "refusing to add different queries - set combine_queries = True." if this.mSbjctToken != other.mSbjctToken or \ this.mSbjctStrand != other.mSbjctStrand : if combine_contig: this.mSbjctToken += "-" + other.mSbjctToken this.mSbjctStrand += other.mSbjctStrand else: raise ValueError, "can not add different sbjct." sbjct_overlap = max(0, min(this.mSbjctGenomeTo, other.mSbjctGenomeTo) -\ max(this.mSbjctGenomeFrom, other.mSbjctGenomeFrom), 0) if sbjct_overlap > 0: if not combine_contig: raise ValueError, "refusing to add overlapping entries: overlap = %i, sbjct:\n%s\n%s\n" % (sbjct_overlap, str(this), str(other)) if this.mSbjctToken == other.mSbjctToken: ## set precedence if this.mSbjctGenomeFrom < other.mSbjctGenomeFrom: first = this second = other else: first = other second = this ## get length of gap d_na = second.mSbjctGenomeFrom - first.mSbjctGenomeTo if this.mQueryToken != other.mQueryToken: d_aa = first.mQueryLength - first.mQueryTo # create a new virtual query by concatenating # the two queries this.mQueryToken += "-" + other.mQueryToken # sort out the alignment second.mMapPeptide2Translation.moveAlignment( first.mQueryLength, 0 ) this.mQueryLength = first.mQueryLength + second.mQueryLength else: d_aa = second.mQueryFrom - first.mQueryTo - 1 this.mSbjctGenomeFrom = min(this.mSbjctGenomeFrom, other.mSbjctGenomeFrom ) this.mSbjctGenomeTo = max(this.mSbjctGenomeTo, other.mSbjctGenomeTo ) this.mMapPeptide2Genome = first.mMapPeptide2Genome + [(code, d_aa, d_na)] + second.mMapPeptide2Genome this.mTranslation = first.mTranslation + second.mTranslation second.mMapPeptide2Translation.moveAlignment( 0, first.mSbjctTo - 1 ) else: ## join on different contigs d_na = contig_size - this.mSbjctGenomeTo + other.mSbjctGenomeFrom + query_overlap * 3 d_aa = other.mQueryFrom - this.mQueryTo - 1 this.mMapPeptide2Genome += [(code, d_aa, d_na),] + other.mMapPeptide2Genome this.mTranslation += other.mTranslation other.mMapPeptide2Translation.moveAlignment( 0, this.mSbjctTo - 1 ) this.mSbjctGenomeFrom = this.mSbjctGenomeFrom this.mSbjctGenomeTo = contig_size + other.mSbjctGenomeTo ## now fill self from first and this self.mQueryToken = first.mQueryToken self.mQueryLength = this.mQueryLength nthis = this.mMapPeptide2Translation.getLength() - this.mMapPeptide2Translation.getNumGaps() nother = other.mMapPeptide2Translation.getLength() - other.mMapPeptide2Translation.getNumGaps() self.mMapPeptide2Genome = first.mMapPeptide2Genome self.mSbjctGenomeFrom = this.mSbjctGenomeFrom self.mSbjctGenomeTo= this.mSbjctGenomeTo ## there might be some reference counting issues, thus ## do it the explicit way. alignlib_lite.py_addAlignment2Alignment( this.mMapPeptide2Translation, other.mMapPeptide2Translation) self.mMapPeptide2Translation = alignlib_lite.py_makeAlignmentVector() alignlib_lite.py_addAlignment2Alignment( self.mMapPeptide2Translation, this.mMapPeptide2Translation ) self.mTranslation = this.mTranslation self.mQueryFrom = self.mMapPeptide2Translation.getRowFrom() self.mQueryTo = self.mMapPeptide2Translation.getRowTo() self.mSbjctFrom = self.mMapPeptide2Translation.getColFrom() self.mSbjctTo = self.mMapPeptide2Translation.getColTo() self.mQueryCoverage = 100.0 * (self.mQueryTo - self.mQueryFrom + 1) / float(self.mQueryLength) self.mAlignmentString = string.join( map( \ lambda x: string.join(map(str, x), " "), self.mMapPeptide2Genome), " ") f = alignlib_lite.py_AlignmentFormatEmssions( self.mMapPeptide2Translation ) self.mQueryAli, self.mSbjctAli = f.mRowAlignment, f.mColAlignment ## summary parameters self.mRank = max( this.mRank, other.mRank) self.score += other.score self.mNGaps += other.mNGaps self.mNFrameShifts += other.mNFrameShifts self.mNIntrons += other.mNIntrons + 1 self.mNStopCodons += other.mNStopCodons nnew = self.mMapPeptide2Translation.getLength() - self.mMapPeptide2Translation.getNumGaps() self.mPercentIdentity = min( 100.0, (self.mPercentIdentity * nthis + other.mPercentIdentity * nother) / nnew ) self.mPercentSimilarity = min( 100.0, (self.mPercentSimilarity * nthis + other.mPercentSimilarity * nother) / nnew ) self.mNAssembled += 1 + other.mNAssembled