def getBioseqFromSetList( self, lSets ): header = "%s::%i %s " % ( lSets[0].name, lSets[0].id, lSets[0].seqname ) sequence = "" lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets ) if not lSets[0].isOnDirectStrand(): lSortedSets.reverse() for iSet in lSortedSets: header += "%i..%i," % ( iSet.getStart(), iSet.getEnd() ) sequence += self.getSubSequence( iSet.seqname, iSet.getStart(), iSet.getEnd() ) return Bioseq( header[:-1], sequence )
def getIdentityFromPathList( lPaths, checkSubjects=True ): if len( PathUtils.getListOfDistinctQueryNames( lPaths ) ) > 1: msg = "ERROR: try to compute identity from Paths with different queries" sys.stderr.write( "%s\n" % msg ); sys.stderr.flush() raise Exception if checkSubjects and len( PathUtils.getListOfDistinctSubjectNames( lPaths ) ) > 1: msg = "ERROR: try to compute identity from Paths with different subjects" sys.stderr.write( "%s\n" % msg ); sys.stderr.flush() raise Exception identity = 0 lMergedPaths = PathUtils.mergePathsInListUsingQueryCoordsOnly( lPaths ) lQuerySets = PathUtils.getSetListFromQueries( lMergedPaths ) lMergedQuerySets = SetUtils.mergeSetsInList( lQuerySets ) totalLengthOnQry = SetUtils.getCumulLength( lMergedQuerySets ) for iPath in lMergedPaths: identity += iPath.identity * iPath.getLengthOnQuery() weightedIdentity = identity / float(totalLengthOnQry) if weightedIdentity < 0 or weightedIdentity > 100: msg = "ERROR: weighted identity '%.2f' outside range" % ( weightedIdentity ) sys.stderr.write( "%s\n" % msg ); sys.stderr.flush() raise Exception return weightedIdentity
lSetIdToRemovePaths = [] count = 0 # for each path ID for id in lPathId: string = "processing path '%i'..." % ( id ) if qtype == "path": lPaths = qtablePathAdaptator.getPathListFromId( id ) lQuerySets = PathUtils.getSetListFromQueries( lPaths ) elif qtype == "set": lQuerySets = qtableSetAdaptator.getSetListFromId( id ) lQuerySets.sort() qmin, qmax = SetUtils.getListBoundaries( lQuerySets ) qmin = qmin - 1 qmax = qmax + 1 if stype == "path": lPaths = stablePathAdaptator.getPathListOverlappingQueryCoord( lQuerySets[0].seqname.split()[0], qmin, qmax ) lSubjectSets = PathUtils.getSetListFromQueries( lPaths ) elif stype == "set": lSubjectSets = stableSetAdaptator.getSetListFromQueryCoord( lQuerySets[0].seqname.split()[0], qmin, qmax ) if verbose > 1: print "----------------------------------------" if len(lSubjectSets) > 0: if verbose > 1: print "annot:"
def getLengthOnQueryFromPathList( lPaths ): lSets = PathUtils.getSetListFromQueries( lPaths ) lMergedSets = SetUtils.mergeSetsInList( lSets ) length = SetUtils.getCumulLength( lMergedSets ) return length