def Split( self, max_gap_length ): """split each alignment into several, if there is a gap longer than min_gap_length. This is necessary, as structural domains can be discontinuos. """ statement = """ SELECT nid, start, end, rep_ali, domain_id, domain_from, domain_to, domain_ali, family %s FROM %s""" % (self.GetAdditionalInfo(), self.name ) tempfile = os.tempnam(Pairsdb.PATH_LOAD, "scmp") outfile = open( tempfile, "w" ) domains = self.Execute(statement).fetchall() for domain in domains: (nid, start, end, rep_ali, domain_id, domain_from, domain_to, domain_ali, family) = domain[:9] map_rep2domains = alignlib.makeAlignataVector() alignlib.fillAlignataCompressed( map_rep2domains, start, rep_ali, domain_from, domain_ali) val = alignlib.splitAlignata( map_rep2domains, max_gap_length) fragments = map( lambda x: alignlib.AlignataPtr(x), val) ## now write each fragment to the output for map_rep2domain in fragments: ## so that the object gets deleted, once it goes out of scope map_rep2domain.thisown = 1 start = map_rep2domain.getRowFrom() end = map_rep2domain.getRowTo() domain_from = map_rep2domain.getColFrom() domain_to = map_rep2domain.getColTo() (rep_ali, domain_ali) = alignlib.writeAlignataCompressed( map_rep2domain) self.WriteLine( outfile, nid, map_rep2domain, domain_id, family, domain[9:]) outfile.close() self.Drop() self.Create() self.Load( tempfile )
def GetLinks(self, query_nid, query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali): """returns all possible links between link split into domains. """ if self.mLogLevel >= 2: print "# processing", query_nid, sbjct_nid, query_from, query_to, sbjct_from, sbjct_to sys.stdout.flush() map_query2sbjct = alignlib.makeAlignataVector() alignlib.fillAlignataCompressed(map_query2sbjct, query_from, query_ali, sbjct_from, sbjct_ali) # iterate over query for query_domain_from, query_domain_to, query_family in self.mDomains[ query_nid]: # check if overlap overlap = min(query_to, query_domain_to) - max( query_from, query_domain_from) + 1 if overlap <= self.mMinOverlapResidues: continue # check for overlap with domains in sbjct for sbjct_domain_from, sbjct_domain_to, sbjct_family in self.mDomains[ sbjct_nid]: overlap = min(sbjct_to, sbjct_domain_to) - max( sbjct_from, sbjct_domain_from) + 1 if overlap < self.mMinOverlapResidues: continue map_new_query2sbjct = alignlib.makeAlignataVector() alignlib.copyAlignata(map_new_query2sbjct, map_query2sbjct, query_domain_from, query_domain_to, sbjct_domain_from, sbjct_domain_to) if map_new_query2sbjct.getLength() > 0: row_ali, col_ali = alignlib.writeAlignataCompressed( map_new_query2sbjct) print string.join( ("%s_%s_%s" % (query_nid, query_domain_from, query_domain_to), "%s_%s_%s" % (sbjct_nid, sbjct_domain_from, sbjct_domain_to), "0", str(map_new_query2sbjct.getRowFrom()), str(map_new_query2sbjct.getRowTo()), row_ali, str(map_new_query2sbjct.getColFrom()), str(map_new_query2sbjct.getColTo()), col_ali), "\t")
def GetLinks( self, query_nid, query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali): """returns all possible links between link split into domains. """ if self.mLogLevel >= 2: print "# processing", query_nid, sbjct_nid, query_from, query_to, sbjct_from, sbjct_to sys.stdout.flush() map_query2sbjct = alignlib.makeAlignataVector() alignlib.fillAlignataCompressed( map_query2sbjct, query_from, query_ali, sbjct_from, sbjct_ali ) # iterate over query for query_domain_from, query_domain_to, query_family in self.mDomains[query_nid]: # check if overlap overlap = min(query_to, query_domain_to)-max(query_from, query_domain_from) + 1 if overlap <= self.mMinOverlapResidues: continue # check for overlap with domains in sbjct for sbjct_domain_from, sbjct_domain_to, sbjct_family in self.mDomains[sbjct_nid]: overlap = min(sbjct_to, sbjct_domain_to)-max(sbjct_from, sbjct_domain_from) + 1 if overlap < self.mMinOverlapResidues: continue map_new_query2sbjct = alignlib.makeAlignataVector() alignlib.copyAlignata( map_new_query2sbjct, map_query2sbjct, query_domain_from, query_domain_to, sbjct_domain_from, sbjct_domain_to) if map_new_query2sbjct.getLength() > 0: row_ali, col_ali = alignlib.writeAlignataCompressed( map_new_query2sbjct ) print string.join( ("%s_%s_%s" % (query_nid, query_domain_from, query_domain_to), "%s_%s_%s" % (sbjct_nid, sbjct_domain_from, sbjct_domain_to), "0", str(map_new_query2sbjct.getRowFrom()), str(map_new_query2sbjct.getRowTo()), row_ali, str(map_new_query2sbjct.getColFrom()), str(map_new_query2sbjct.getColTo()), col_ali), "\t")
def MapAndAddDomains( self, domains): """Map a domain using an alignment and write to outputfile for loading into table. map info from member to rep domains contains the following information: nid, # nid of new rep info_mem_from, info_mem_to, info_mem_ali, # information to be mapped on mem info_from, info_to, info_ali, # information to be mapped on other quantity start, end, rep_ali, # map between mem and new rep, rep-part mem_from, mem_to, mem_ali, # map between mem and new rep, mem-part ...info-fields """ temp_filename = os.tempnam( Pairsdb.PATH_LOAD, "clup" ) failed = 0 outfile = open(temp_filename, "w") for domain in domains: ( nid, info_mem_from, info_mem_to, info_mem_ali, info_from, info_to, info_ali, start, end, rep_ali, mem_from, mem_to, mem_ali, domain_id, family) = domain[:15] # set does not work (for example 1b77 for ddd, obscure error, probably # due to destructors? map_info_mem2info = alignlib.makeAlignataVector() alignlib.fillAlignataCompressed( map_info_mem2info, info_mem_from, info_mem_ali.tostring(), info_from, info_ali.tostring() ) map_rep2mem = alignlib.makeAlignataVector() alignlib.fillAlignataCompressed( map_rep2mem, start, rep_ali.tostring(), mem_from, mem_ali.tostring() ) map_rep2info = alignlib.makeAlignataVector() alignlib.combineAlignata( map_rep2info, map_rep2mem, map_info_mem2info, alignlib.CR) if map_rep2info.getLength() == 0: if self.mLogLevel >= 2: print "----> mapping failed for", domain sys.stdout.flush() failed += 1 else: self.WriteLine( outfile, nid, map_rep2info, domain_id, family, domain[15:]) outfile.close() self.Load( temp_filename ) if self.mLogLevel >= 1: print "--> mapping failed for %i pairings." % failed sys.stdout.flush() return failed
ali = alignlib.makeAlignataVector() for line in sys.stdin: if line[0] == "#": continue link.Read( line ) ninput += 1 if link.mQueryToken not in sequences or link.mSbjctToken not in sequences: nskipped += 1 continue ali.Clear() alignlib.fillAlignataCompressed( ali, link.mQueryFrom, link.mQueryAli, link.mSbjctFrom, link.mSbjctAli ) result = alignlib.writePairAlignment( sequences[link.mQueryToken], sequences[link.mSbjctToken], ali ).split("\n") if len(result) != 3: nfailed += 1 if options.format == "fasta": print ">%s %i-%i\n%s\n>%s %i-%i\n%s\n" %\ (link.mQueryToken, link.mQueryFrom, link.mQueryTo, result[0].split("\t")[1], link.mSbjctToken, link.mSbjctFrom, link.mSbjctTo, result[1].split("\t")[1] ) noutput += 1 print "# ninput=%i, noutput=%i, nskipped=%i, nfailed=%i" % (ninput, noutput, nskipped, nfailed)
identifiers.append( sbjct_nid ) else: sbjct_sequence, sbjct_nid = GetSequence( tbl_nrdb, sbjct_nid) (identifier, description ) = tbl_nrdb.GetAnnotationFromNid( sbjct_nid ) sbjct_alignatum = pairsdblib.makeAlignatumNeighbour( sbjct_sequence, identifier, description, sbjct_nid, string.atof(0), string.atof(0), string.atof(score)) ## build alignment between mali and new sequence alignlib.fillAlignataCompressed( map_query2sbjct, string.atoi(query_from), query_ali, string.atoi(sbjct_from), sbjct_ali) if len(sbjct_sequence) < map_query2sbjct.getColTo(): print "entry %i skipped, because sequence length (%i) less than last residue aligned (%i)!!!" %\ (sbjct_nid, len(sbjct_sequence), map_query2sbjct.getColTo()) continue mali.addAlignatum( sbjct_alignatum, map_query2sbjct, 1, 0, 1, 1, 0 ) sbjct_alignatum.thisown = 0 consensus = mali.getConsensusString() if not param_plain: renderer = alignlib.makeRendererMView( consensus ) mali.registerRenderer(renderer)
def BuildBLASTMatrix( dbhandle, query_nid, resolution = 1.0, table_name = None, combine_repeats = None, max_evalue = None, min_evalue = None, residue_level = None, parser = None, add_self = None): """build matrix based on BLAST alignments to query_nid. matrix of size N*M N: number of neighbours M: length of query (scaled with resolution) alignments are truncated. the query is included in the matrix. if combine_repeats is set, multiple alignments between the query and a sbjct will be entered into the same row. if residue_level is set, entries are added on the residue level. The resolution parameter is ignored. """ if residue_level: query_length = Table_nrdb(dbhandle).GetLength( query_nid ) else: query_length = int( math.floor( float(Table_nrdb(dbhandle).GetLength( query_nid )) / float(resolution))) tbl_pairsdb_90x90 = TablePairsdbNeighbours( dbhandle ) if table_name: tbl_pairsdb_90x90.SetName( table_name ) neighbours = tbl_pairsdb_90x90.GetNeighbours( query_nid, sort_order = 3, skip_query = add_self, min_evalue = min_evalue, max_evalue = max_evalue) nindex = {} nneighbours = 0 if combine_repeats: for neighbour in neighbours: (query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali, score, pide, evalue) = neighbour if not nindex.has_key(sbjct_nid): nindex[sbjct_nid] = nneighbours nneighbours += 1 else: nneighbours = len(neighbours) if add_self: nneighbours += 1 matrix = numpy.zeros( (nneighbours, query_length), numpy.int) if add_self: matrix[0, 0:query_length] = 1 row = 1 else: row = 0 for neighbour in neighbours: (query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali, score, pide, evalue) = neighbour if combine_repeats: use_row = nindex[sbjct_nid] else: use_row = row row += 1 if residue_level: map_sbjct2query = alignlib.makeAlignataVector() alignlib.fillAlignataCompressed( map_sbjct2query, sbjct_from, sbjct_ali, query_from, query_ali ) if parser: parser( map_sbjct2query ) for x in range(sbjct_from, sbjct_to + 1): y = map_sbjct2query.mapRowToCol(x) if y: try: matrix[use_row, y-1] = 1 except IndexError: print "IndexError in ", query_nid, sbjct_nid, x, y-1, query_length else: yfrom = int(math.floor(query_from/resolution)) yto = int(math.floor(query_to/resolution)) matrix[use_row, yfrom:yto] = 1 return matrix
def BuildBLASTMatrix( dbhandle, query_nid, resolution=1.0, table_name=None, combine_repeats=None, max_evalue=None, min_evalue=None, residue_level=None, parser=None, add_self=None, ): """build matrix based on BLAST alignments to query_nid. matrix of size N*M N: number of neighbours M: length of query (scaled with resolution) alignments are truncated. the query is included in the matrix. if combine_repeats is set, multiple alignments between the query and a sbjct will be entered into the same row. if residue_level is set, entries are added on the residue level. The resolution parameter is ignored. """ if residue_level: query_length = Table_nrdb(dbhandle).GetLength(query_nid) else: query_length = int(math.floor(float(Table_nrdb(dbhandle).GetLength(query_nid)) / float(resolution))) tbl_pairsdb_90x90 = TablePairsdbNeighbours(dbhandle) if table_name: tbl_pairsdb_90x90.SetName(table_name) neighbours = tbl_pairsdb_90x90.GetNeighbours( query_nid, sort_order=3, skip_query=add_self, min_evalue=min_evalue, max_evalue=max_evalue ) nindex = {} nneighbours = 0 if combine_repeats: for neighbour in neighbours: ( query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali, score, pide, evalue, ) = neighbour if not nindex.has_key(sbjct_nid): nindex[sbjct_nid] = nneighbours nneighbours += 1 else: nneighbours = len(neighbours) if add_self: nneighbours += 1 matrix = numpy.zeros((nneighbours, query_length), numpy.int) if add_self: matrix[0, 0:query_length] = 1 row = 1 else: row = 0 for neighbour in neighbours: (query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali, score, pide, evalue) = neighbour if combine_repeats: use_row = nindex[sbjct_nid] else: use_row = row row += 1 if residue_level: map_sbjct2query = alignlib.makeAlignataVector() alignlib.fillAlignataCompressed(map_sbjct2query, sbjct_from, sbjct_ali, query_from, query_ali) if parser: parser(map_sbjct2query) for x in range(sbjct_from, sbjct_to + 1): y = map_sbjct2query.mapRowToCol(x) if y: try: matrix[use_row, y - 1] = 1 except IndexError: print "IndexError in ", query_nid, sbjct_nid, x, y - 1, query_length else: yfrom = int(math.floor(query_from / resolution)) yto = int(math.floor(query_to / resolution)) matrix[use_row, yfrom:yto] = 1 return matrix