Beispiel #1
0
    def GetLinks(self, query_nid, query_from, query_to, query_ali, sbjct_nid,
                 sbjct_from, sbjct_to, sbjct_ali):
        """returns all possible links between link split into domains.
        """

        if self.mLogLevel >= 2:
            print "# processing", query_nid, sbjct_nid, query_from, query_to, sbjct_from, sbjct_to
            sys.stdout.flush()

        map_query2sbjct = alignlib.makeAlignataVector()

        alignlib.fillAlignataCompressed(map_query2sbjct, query_from, query_ali,
                                        sbjct_from, sbjct_ali)

        # iterate over query
        for query_domain_from, query_domain_to, query_family in self.mDomains[
                query_nid]:

            # check if overlap
            overlap = min(query_to, query_domain_to) - max(
                query_from, query_domain_from) + 1
            if overlap <= self.mMinOverlapResidues: continue

            # check for overlap with domains in sbjct
            for sbjct_domain_from, sbjct_domain_to, sbjct_family in self.mDomains[
                    sbjct_nid]:

                overlap = min(sbjct_to, sbjct_domain_to) - max(
                    sbjct_from, sbjct_domain_from) + 1
                if overlap < self.mMinOverlapResidues: continue

                map_new_query2sbjct = alignlib.makeAlignataVector()
                alignlib.copyAlignata(map_new_query2sbjct, map_query2sbjct,
                                      query_domain_from, query_domain_to,
                                      sbjct_domain_from, sbjct_domain_to)

                if map_new_query2sbjct.getLength() > 0:

                    row_ali, col_ali = alignlib.writeAlignataCompressed(
                        map_new_query2sbjct)

                    print string.join(
                        ("%s_%s_%s" %
                         (query_nid, query_domain_from, query_domain_to),
                         "%s_%s_%s" %
                         (sbjct_nid, sbjct_domain_from, sbjct_domain_to), "0",
                         str(map_new_query2sbjct.getRowFrom()),
                         str(map_new_query2sbjct.getRowTo()), row_ali,
                         str(map_new_query2sbjct.getColFrom()),
                         str(map_new_query2sbjct.getColTo()), col_ali), "\t")
    def CheckLink(self, query_nid, query_from, query_to, sbjct_nid, sbjct_from,
                  sbjct_to):
        """check, whether link is faithfull.
        """

        query_profile = self.GetProfile(query_nid)
        query_profile.useSegment(query_from, query_to)

        sbjct_profile = self.GetProfile(sbjct_nid)
        sbjct_profile.useSegment(sbjct_from, sbjct_to)

        alignator = alignlib.makeFullDP(self.mGop, self.mGep)
        result = alignlib.makeAlignataVector()

        alignator.Align(query_profile, sbjct_profile, result)

        if self.mLogLevel >= 3:
            print "# --> %i vs %i: score=%5.2f, length=%i, numgaps=%i, row_from=%i, row_to=%i, col_from=%i, col_to=%i" %\
                  (query_nid, sbjct_nid,
                   result.getScore(),
                   result.getLength(),
                   result.getNumGaps(),
                   result.getRowFrom(), result.getRowTo(),
                   result.getColFrom(), result.getColTo())
            sys.stdout.flush()

        query_profile.useFullLength()
        sbjct_profile.useFullLength()

        if result.getScore() > self.mMinAlignmentScore:
            return 1, result
        else:
            return 0, result
Beispiel #3
0
def PrintAlignedSequences(sequence1, sequence2, chain=None, format="modeller"):

    ## align sequences by identity

    seq_row = alignlib.makeSequence(sequence1)
    seq_col = alignlib.makeSequence(sequence2)
    alignator = alignlib.makeAlignatorFullDP(-0.0, -0.0)
    map_row2col = alignlib.makeAlignataVector()
    alignator.Align(seq_row, seq_col, map_row2col)

    lines = string.split(
        alignlib.writePairAlignment(seq_row, seq_col, map_row2col), "\n")

    if format == "modeller":

        first_res, sequence, last_res = string.split(lines[0], "\t")

        print ">P1;structure"
        print "structureX: %s : %s : %s : %s : %s : : : : " % (
            "structure", first_res, "", last_res, "")
        print "%s*" % sequence

        first_res, sequence, last_res = string.split(lines[1], "\t")

        print ">P1;sequence"
        print "sequence:%s : %s : %s : %s : %s : : : : " % (
            "sequence", first_res, "", last_res, "")
        print "%s*" % sequence
    else:
        print lines
Beispiel #4
0
def PrintAlignedSequences( sequence1, sequence2, chain = None, format="modeller" ):

    ## align sequences by identity

    seq_row = alignlib.makeSequence( sequence1 )
    seq_col = alignlib.makeSequence( sequence2 )
    alignator = alignlib.makeAlignatorFullDP( -0.0, -0.0 )
    map_row2col = alignlib.makeAlignataVector()
    alignator.Align( seq_row, seq_col, map_row2col )

    lines = string.split(alignlib.writePairAlignment( seq_row, seq_col, map_row2col ), "\n")

    if format == "modeller":
        
        first_res, sequence, last_res = string.split( lines[0], "\t" )
        
        print ">P1;structure"  
        print "structureX: %s : %s : %s : %s : %s : : : : " % ("structure", first_res, "" , last_res, "" )
        print "%s*" % sequence

        first_res, sequence, last_res = string.split( lines[1], "\t" )
        
        print ">P1;sequence"
        print "sequence:%s : %s : %s : %s : %s : : : : " % ("sequence" , first_res, "", last_res, "")
        print "%s*" % sequence
    else:
        print lines
Beispiel #5
0
def GetAlignmentBetweenCorrespondingAtoms( coordinates1, coordinates2, cutoff ):
    """returns a list of atom positions, which are close to each other.

    This is done via a dynamic programming step. First all versus all comparison
    between atom positions is done. Only those positions are kept below cutoff.
    """

    dots = alignlib.makeAlignataMatrixRow()
    for i in range(len(coordinates1)):
        x1,y1,z1 = coordinates1[i]
        for j in range(len(coordinates2)):
            x2,y2,z2 = coordinates2[j]
            d = math.sqrt( (x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2))
            if d <= cutoff:
                dots.addPairExplicit(i+1, j+1, 1)
                
    seq1 = alignlib.makeSequence ("A" * len(coordinates1))
    seq2 = alignlib.makeSequence ("A" * len(coordinates2))    

    if dots.getLength() <= 3:
        return None
    
    dottor = alignlib.makeAlignatorDummy( dots )
    alignator = alignlib.makeAlignatorDotsSquared( 0, 0, dottor)
    map_a2b = alignlib.makeAlignataVector()
    
    alignator.Align( seq1, seq2, map_a2b)

    return map_a2b
Beispiel #6
0
    def Split( self, max_gap_length ):
        """split each alignment into several,
        if there is a gap longer than min_gap_length. This is necessary, as
        structural domains can be discontinuos.
        """

        statement = """
        SELECT nid, start, end, rep_ali,
        domain_id, domain_from, domain_to, domain_ali,
        family
        %s
        FROM %s""" % (self.GetAdditionalInfo(), self.name )

        tempfile = os.tempnam(Pairsdb.PATH_LOAD, "scmp")
        
        outfile = open( tempfile, "w" )
        
        domains = self.Execute(statement).fetchall()

        for domain in domains:
            (nid, start, end, rep_ali,
             domain_id, domain_from, domain_to, domain_ali,
             family) = domain[:9]

            map_rep2domains = alignlib.makeAlignataVector()

            alignlib.fillAlignataCompressed( map_rep2domains, start, rep_ali, domain_from, domain_ali)

            val = alignlib.splitAlignata( map_rep2domains, max_gap_length)
            
            fragments = map( lambda x: alignlib.AlignataPtr(x), val)
 
            ## now write each fragment to the output
            for map_rep2domain in fragments:
                ## so that the object gets deleted, once it goes out of scope
                map_rep2domain.thisown = 1
                                           
                start = map_rep2domain.getRowFrom()
                end = map_rep2domain.getRowTo()
                domain_from = map_rep2domain.getColFrom()
                domain_to = map_rep2domain.getColTo()

                (rep_ali, domain_ali) = alignlib.writeAlignataCompressed( map_rep2domain)

                self.WriteLine( outfile,
                                nid, 
                                map_rep2domain,
                                domain_id,
                                family,
                                domain[9:])
                    

        outfile.close()

        self.Drop()
        self.Create()
        self.Load( tempfile )
Beispiel #7
0
    def Split( self, max_gap_length ):
        """split each alignment into several,
        if there is a gap longer than min_gap_length. This is necessary, as
        structural domains can be discontinuos.
        """

        statement = """
        SELECT nid, start, end, rep_ali,
        domain_id, domain_from, domain_to, domain_ali,
        family
        %s
        FROM %s""" % (self.GetAdditionalInfo(), self.name )

        tempfile = os.tempnam(Pairsdb.PATH_LOAD, "scmp")
        
        outfile = open( tempfile, "w" )
        
        domains = self.Execute(statement).fetchall()

        for domain in domains:
            (nid, start, end, rep_ali,
             domain_id, domain_from, domain_to, domain_ali,
             family) = domain[:9]

            map_rep2domains = alignlib.makeAlignataVector()

            alignlib.fillAlignataCompressed( map_rep2domains, start, rep_ali, domain_from, domain_ali)

            val = alignlib.splitAlignata( map_rep2domains, max_gap_length)
            
            fragments = map( lambda x: alignlib.AlignataPtr(x), val)
 
            ## now write each fragment to the output
            for map_rep2domain in fragments:
                ## so that the object gets deleted, once it goes out of scope
                map_rep2domain.thisown = 1
                                           
                start = map_rep2domain.getRowFrom()
                end = map_rep2domain.getRowTo()
                domain_from = map_rep2domain.getColFrom()
                domain_to = map_rep2domain.getColTo()

                (rep_ali, domain_ali) = alignlib.writeAlignataCompressed( map_rep2domain)

                self.WriteLine( outfile,
                                nid, 
                                map_rep2domain,
                                domain_id,
                                family,
                                domain[9:])
                    

        outfile.close()

        self.Drop()
        self.Create()
        self.Load( tempfile )
    def GetLinks( self, query_nid, query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali):
        """returns all possible links between link split into domains.
        """

        if self.mLogLevel >= 2:
            print "# processing", query_nid, sbjct_nid, query_from, query_to, sbjct_from, sbjct_to
            sys.stdout.flush()
            
        map_query2sbjct = alignlib.makeAlignataVector()

        alignlib.fillAlignataCompressed( map_query2sbjct, query_from, query_ali, sbjct_from, sbjct_ali )

        # iterate over query
        for query_domain_from, query_domain_to, query_family in self.mDomains[query_nid]:

            # check if overlap
            overlap = min(query_to, query_domain_to)-max(query_from, query_domain_from) + 1
            if overlap <= self.mMinOverlapResidues: continue

            # check for overlap with domains in sbjct
            for sbjct_domain_from, sbjct_domain_to, sbjct_family in self.mDomains[sbjct_nid]:
                
                overlap = min(sbjct_to, sbjct_domain_to)-max(sbjct_from, sbjct_domain_from) + 1
                if overlap < self.mMinOverlapResidues: continue

                map_new_query2sbjct = alignlib.makeAlignataVector()
                alignlib.copyAlignata( map_new_query2sbjct, map_query2sbjct,
                                       query_domain_from, query_domain_to,
                                       sbjct_domain_from, sbjct_domain_to)

                if map_new_query2sbjct.getLength() > 0:

                    row_ali, col_ali = alignlib.writeAlignataCompressed(  map_new_query2sbjct )
                    
                    print string.join( ("%s_%s_%s" % (query_nid, query_domain_from, query_domain_to),
                                        "%s_%s_%s" % (sbjct_nid, sbjct_domain_from, sbjct_domain_to),
                                        "0",
                                        str(map_new_query2sbjct.getRowFrom()),
                                        str(map_new_query2sbjct.getRowTo()),
                                        row_ali,
                                        str(map_new_query2sbjct.getColFrom()),
                                        str(map_new_query2sbjct.getColTo()),
                                        col_ali), "\t")
Beispiel #9
0
    def CalculateMatches(self):
        """calculate all-vs-all alignments.
        """

        if not self.mAlignanda:
            self.GetAlignanda()

        if self.mLogLevel >= 1:
            print "# --> calculating alignments for %i entries" % len(
                self.mAlignanda)
            print "# --> starting at:", Tools.GetTimeStamp()

        nalignanda = len(self.mAlignanda)

        for a1 in range(self.startAt, nalignanda - 1):
            if self.mLogLevel >= 1:
                print "# %5i/%5i at %s" % (a1, nalignanda,
                                           Tools.GetTimeStamp())
                sys.stdout.flush()

            for a2 in range(a1 + 1, nalignanda):

                if self.mLogLevel >= 3:
                    print "#    aligning to %i" % (a2), self.mInformation[a2]
                    sys.stdout.flush()

                result = alignlib.makeAlignataVector()

                self.mAlignator.Align(self.mAlignanda[a1], self.mAlignanda[a2],
                                      result)

                info = self.mAlignator.CheckResult(result,
                                                   self.mInformation[a1],
                                                   self.mInformation[a2])

                if info:
                    r = tuple(self.mInformation[a1]) + tuple(
                        self.mInformation[a2]) + tuple(info)
                    print string.join(r, "\t")

                sys.stdout.flush()

            self.mAlignanda[a1].Release()
            self.mAlignanda[a1] = None

        if self.mLogLevel >= 1:
            print "# --> finished at:", Tools.GetTimeStamp()
Beispiel #10
0
def getMapFromMali( seq1, seq2, gap_char = "-" ):
    """build map of positions between mali."""
    xpos = 0
    ypos = 0

    map_a2b = alignlib.makeAlignataVector()
    # build map between genomic sequences:
    for p in range(len(seq1)):

        if     seq1[p] != gap_char and \
               seq2[p] != gap_char and \
               seq1[p] in string.uppercase and \
               seq2[p] in string.uppercase:
            map_a2b.addPairExplicit( xpos + 1, ypos + 1, 0)
            
        if seq1[p] != gap_char:
            xpos += 1
        if seq2[p] != gap_char:
            ypos += 1
    return map_a2b
Beispiel #11
0
    def CalculateMatches( self ):
        """calculate all-vs-all alignments.
        """

        if not self.mAlignanda:
            self.GetAlignanda()
            
        if self.mLogLevel >= 1:
            print "# --> calculating alignments for %i entries" % len(self.mAlignanda)
            print "# --> starting at:", Tools.GetTimeStamp()

        nalignanda = len(self.mAlignanda)
        
        for a1 in range(self.startAt, nalignanda-1):
            if self.mLogLevel >= 1:
                print "# %5i/%5i at %s" % (a1, nalignanda, Tools.GetTimeStamp())
                sys.stdout.flush()
                
            for a2 in range(a1+1,nalignanda):
                
                if self.mLogLevel >= 3:
                    print "#    aligning to %i" % (a2), self.mInformation[a2]
                    sys.stdout.flush()

                result = alignlib.makeAlignataVector()

                self.mAlignator.Align( self.mAlignanda[a1], self.mAlignanda[a2], result )

                info = self.mAlignator.CheckResult( result, self.mInformation[a1], self.mInformation[a2] )
                
                if info:
                    r = tuple(self.mInformation[a1]) + tuple(self.mInformation[a2]) + tuple(info)
                    print string.join(r, "\t" )
                
                sys.stdout.flush()
                
            self.mAlignanda[a1].Release()
            self.mAlignanda[a1] = None
            
        if self.mLogLevel >= 1:
            print "# --> finished at:", Tools.GetTimeStamp()
Beispiel #12
0
def BuildBLASTMatrix(
    dbhandle,
    query_nid,
    resolution=1.0,
    table_name=None,
    combine_repeats=None,
    max_evalue=None,
    min_evalue=None,
    residue_level=None,
    parser=None,
    add_self=None,
):
    """build matrix based on BLAST alignments to query_nid.

    matrix of size N*M
    N: number of neighbours
    M: length of query (scaled with resolution)

    alignments are truncated.

    the query is included in the matrix.
    
    if combine_repeats is set, multiple alignments between the query and a sbjct will
    be entered into the same row.

    if residue_level is set, entries are added on the residue level. The resolution parameter
    is ignored.
    """

    if residue_level:
        query_length = Table_nrdb(dbhandle).GetLength(query_nid)
    else:
        query_length = int(math.floor(float(Table_nrdb(dbhandle).GetLength(query_nid)) / float(resolution)))

    tbl_pairsdb_90x90 = TablePairsdbNeighbours(dbhandle)
    if table_name:
        tbl_pairsdb_90x90.SetName(table_name)

    neighbours = tbl_pairsdb_90x90.GetNeighbours(
        query_nid, sort_order=3, skip_query=add_self, min_evalue=min_evalue, max_evalue=max_evalue
    )
    nindex = {}

    nneighbours = 0
    if combine_repeats:
        for neighbour in neighbours:
            (
                query_from,
                query_to,
                query_ali,
                sbjct_nid,
                sbjct_from,
                sbjct_to,
                sbjct_ali,
                score,
                pide,
                evalue,
            ) = neighbour
            if not nindex.has_key(sbjct_nid):
                nindex[sbjct_nid] = nneighbours
                nneighbours += 1
    else:
        nneighbours = len(neighbours)

    if add_self:
        nneighbours += 1

    matrix = numpy.zeros((nneighbours, query_length), numpy.int)

    if add_self:
        matrix[0, 0:query_length] = 1
        row = 1
    else:
        row = 0

    for neighbour in neighbours:

        (query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali, score, pide, evalue) = neighbour

        if combine_repeats:
            use_row = nindex[sbjct_nid]
        else:
            use_row = row
            row += 1

        if residue_level:
            map_sbjct2query = alignlib.makeAlignataVector()
            alignlib.fillAlignataCompressed(map_sbjct2query, sbjct_from, sbjct_ali, query_from, query_ali)
            if parser:
                parser(map_sbjct2query)

            for x in range(sbjct_from, sbjct_to + 1):
                y = map_sbjct2query.mapRowToCol(x)
                if y:
                    try:
                        matrix[use_row, y - 1] = 1
                    except IndexError:
                        print "IndexError in ", query_nid, sbjct_nid, x, y - 1, query_length
        else:
            yfrom = int(math.floor(query_from / resolution))
            yto = int(math.floor(query_to / resolution))
            matrix[use_row, yfrom:yto] = 1

    return matrix
Beispiel #13
0
    def MapAndAddDomains( self, domains):
        """Map a domain using an alignment and write to outputfile for 
        loading into table.

        map info from member to rep

        domains contains the following information:
        nid,                        # nid of new rep
        info_mem_from, info_mem_to, info_mem_ali,      # information to be mapped on mem
        info_from, info_to, info_ali,   # information to be mapped on other quantity
        start, end, rep_ali,      # map between mem and new rep, rep-part
        mem_from, mem_to, mem_ali,       # map between mem and new rep, mem-part
        ...info-fields
        """

        temp_filename = os.tempnam( Pairsdb.PATH_LOAD, "clup" )
        
        failed = 0
        
        outfile = open(temp_filename, "w")

        for domain in domains:
            
            ( nid,
              info_mem_from, info_mem_to, info_mem_ali,
              info_from, info_to, info_ali,
              start, end, rep_ali,
              mem_from, mem_to, mem_ali,
              domain_id, family) = domain[:15]
            
            # set does not work (for example 1b77 for ddd, obscure error, probably
            # due to destructors?
            map_info_mem2info = alignlib.makeAlignataVector()
            
            alignlib.fillAlignataCompressed( map_info_mem2info,
                                             info_mem_from, info_mem_ali.tostring(),
                                             info_from, info_ali.tostring() )
            
            map_rep2mem = alignlib.makeAlignataVector()
            alignlib.fillAlignataCompressed( map_rep2mem,
                                             start, rep_ali.tostring(),
                                             mem_from, mem_ali.tostring() )
            
            map_rep2info = alignlib.makeAlignataVector()
            alignlib.combineAlignata( map_rep2info, map_rep2mem, map_info_mem2info, alignlib.CR) 

            if map_rep2info.getLength() == 0:
                if self.mLogLevel >= 2:
                    print "----> mapping failed for", domain
                    sys.stdout.flush()
                failed += 1
                
            else:
                self.WriteLine( outfile, nid, map_rep2info, domain_id, family,
                                domain[15:])
        
        outfile.close()
        
        self.Load( temp_filename )
        
        if self.mLogLevel >= 1:
            print "--> mapping failed for %i pairings." % failed
            sys.stdout.flush()

        return failed
Beispiel #14
0
def BuildBLASTMatrix( dbhandle,
                      query_nid,
                      resolution = 1.0,
                      table_name = None,
                      combine_repeats = None,
                      max_evalue = None,
                      min_evalue = None,
                      residue_level = None,
                      parser = None,
                      add_self = None):
    """build matrix based on BLAST alignments to query_nid.

    matrix of size N*M
    N: number of neighbours
    M: length of query (scaled with resolution)

    alignments are truncated.

    the query is included in the matrix.
    
    if combine_repeats is set, multiple alignments between the query and a sbjct will
    be entered into the same row.

    if residue_level is set, entries are added on the residue level. The resolution parameter
    is ignored.
    """

    if residue_level:
        query_length = Table_nrdb(dbhandle).GetLength( query_nid )
    else:
        query_length = int( math.floor( float(Table_nrdb(dbhandle).GetLength( query_nid )) / float(resolution)))

    tbl_pairsdb_90x90 = TablePairsdbNeighbours( dbhandle )
    if table_name:
        tbl_pairsdb_90x90.SetName( table_name )

    neighbours = tbl_pairsdb_90x90.GetNeighbours( query_nid,
                                                  sort_order = 3,
                                                  skip_query = add_self,
                                                  min_evalue = min_evalue,
                                                  max_evalue = max_evalue)
    nindex = {}
    
    nneighbours = 0
    if combine_repeats:
        for neighbour in neighbours:
            (query_from, query_to, query_ali,
             sbjct_nid, sbjct_from, sbjct_to, sbjct_ali, score, pide, evalue) = neighbour
            if not nindex.has_key(sbjct_nid):
                nindex[sbjct_nid] = nneighbours
                nneighbours += 1
    else:
        nneighbours = len(neighbours)

    if add_self:
        nneighbours += 1

    matrix = numpy.zeros( (nneighbours, query_length), numpy.int)    

    if add_self:
        matrix[0, 0:query_length] = 1
        row = 1
    else:
        row = 0
        
    for neighbour in neighbours:

        (query_from, query_to, query_ali,
         sbjct_nid, sbjct_from, sbjct_to, sbjct_ali, score, pide, evalue) = neighbour

        if combine_repeats:
            use_row = nindex[sbjct_nid]
        else:
            use_row = row
            row += 1

        if residue_level:
            map_sbjct2query = alignlib.makeAlignataVector()
            alignlib.fillAlignataCompressed( map_sbjct2query, sbjct_from, sbjct_ali, query_from, query_ali )
            if parser:
                parser( map_sbjct2query )
                
            for x in range(sbjct_from, sbjct_to + 1):
                y = map_sbjct2query.mapRowToCol(x)
                if y:
                    try:
                        matrix[use_row, y-1] = 1
                    except IndexError:
                        print "IndexError in ", query_nid, sbjct_nid, x, y-1, query_length
        else:
            yfrom = int(math.floor(query_from/resolution))
            yto   = int(math.floor(query_to/resolution)) 
            matrix[use_row, yfrom:yto] = 1
            
    return matrix
Beispiel #15
0
            for token, sequence in lines:
                if param_sequences.has_key(token):
                    if len(param_sequences[token]) >= len(sequence):
                        continue
                param_sequences[token] = sequence
                
    tbl_nrdb = Table_nrdb( dbhandle )

    query_sequence = None

    if param_unaligned:
        mali = alignlib.makeMultipleAlignmentDots(param_compression)
    else:
        mali = alignlib.makeMultipleAlignment()
        
    map_query2sbjct = alignlib.makeAlignataVector()

    lines = map( lambda x: string.split( x[:-1], "\t")[:9], filter( lambda x: x[0] != "#", sys.stdin.readlines()))

    if param_sort_order:
        data = []
        for line in lines:
            sbjct_nid = line[1]
            if param_sort_order.has_key( sbjct_nid ):
                o = param_sort_order[sbjct_nid]
            else:
                o = len(param_sort_order)
            data.append( (o, line) )

        data.sort()
        lines = map( lambda x: x[1], data)
    def CheckLink( self,
                   query_nid, query_from, query_to,
                   sbjct_nid, sbjct_from, sbjct_to):
        """check, whether link is faithfull.
        """


        query_profile = self.GetProfile( query_nid )
        query_profile.useSegment( query_from, query_to)

        sbjct_profile = self.GetProfile( sbjct_nid )
        sbjct_profile.useSegment( sbjct_from, sbjct_to)        

        alignator = alignlib.makeFullDP( self.mGop, self.mGep )
        result = alignlib.makeAlignataVector()
        
        alignator.Align( query_profile, sbjct_profile, result)
        
        if result.getLength() == 0:
            query_profile.useFullLength()
            sbjct_profile.useFullLength()
            return 0, result
        
        if self.mLogLevel >= 3:
            print "# --> %i vs %i: score=%5.2f, length=%i, numgaps=%i, row_from=%i, row_to=%i, col_from=%i, col_to=%i" %\
                  (query_nid, sbjct_nid,
                   result.getScore(),
                   result.getLength(),
                   result.getNumGaps(),
                   result.getRowFrom(), result.getRowTo(),
                   result.getColFrom(), result.getColTo())
            sys.stdout.flush()

        if result.getScore() < self.mMinAlignmentScore:
            query_profile.useFullLength()
            sbjct_profile.useFullLength()
            return 0, result

        if result.getScore() > self.mSafetyThreshold * self.mMinAlignmentScore:
            query_profile.useFullLength()
            sbjct_profile.useFullLength()
            return 1,result
        
        z_params = alignlib.makeNormalDistributionParameters()
        alignlib.calculateZScoreParameters( z_params,
                                            query_profile,
                                            sbjct_profile,
                                            alignator,
                                            self.mNumIterationsZScore)
        mean   = z_params.getMean()
        stddev = z_params.getStandardDeviation()
        if stddev == 0: stddev = 1
        
        zscore = (result.getScore() - mean) / stddev
        
        if self.mLogLevel >= 3:
            print "# --> mean=%f, stdev=%f, zscore=%f" % (mean, stddev, zscore)
            sys.stdout.flush()
            
        query_profile.useFullLength()
        sbjct_profile.useFullLength()
        
        if zscore > self.mMinZScore:
            return 1,result
        else:
            return 0,result
Beispiel #17
0
    sequences = Genomics.ReadPeptideSequences( open(options.filename_sequences, "r") )

    if options.loglevel >= 1:
        print "# read %i sequences" % len(sequences)
        
    for k in sequences.keys():
        sequences[k] = alignlib.makeSequence( sequences[k] )

    if options.loglevel >= 2:
        print "# converted %i sequences" % len(sequences)
    
    ninput, noutput, nskipped, nfailed = 0, 0, 0, 0
    link = BlastAlignments.Link()

    ali = alignlib.makeAlignataVector()
    
    for line in sys.stdin:
        
        if line[0] == "#": continue

        link.Read( line )
        ninput += 1

        if link.mQueryToken not in sequences or link.mSbjctToken not in sequences:
            nskipped += 1
            continue
        
        ali.Clear()
        alignlib.fillAlignataCompressed( ali, link.mQueryFrom, link.mQueryAli, link.mSbjctFrom, link.mSbjctAli )
Beispiel #18
0
        del options.parameters[0]

        old_length = mali.getLength()
        
        new_mali = convertMali2Mali( mali )

        if options.alignment_method == "sw":
            alignator = alignlib.makeAlignatorFullDP( options.gop, options.gep )
        else:
            alignator = alignlib.makeAlignatorFullDPGlobal( options.gop, options.gep )            
        
        while 1:
            cur_record = iterator.next()
            if cur_record is None: break

            map_mali2seq = alignlib.makeAlignataVector()

            sequence = alignlib.makeSequence( cur_record.sequence )
            profile = alignlib.makeProfileFromMali( new_mali )

            if options.loglevel >= 4:
                options.stdlog.write(profile.Write())

            alignator.Align( profile, sequence, map_mali2seq )

            if options.loglevel >= 3:
                options.stdlog.write( map_mali2seq.Write() )

            ## add sequence to mali
            a = alignlib.makeAlignatumFromString( cur_record.sequence )
            a.thisown = 0
Beispiel #19
0
        infile = sys.stdin

    parser = FastaIterator.FastaIterator( infile )

    sequences = []
    while 1:
        cur_record = iterator.next()
        
        if cur_record is None: break
        sequences.append( (cur_record.title, alignlib.makeSequence(re.sub( " ", "", cur_record.sequence)) ) )
    
    if options.filename_sequences:
        infile.close()

    alignator = alignlib.makeAlignatorFullDP( options.gop, options.gep )
    map_a2b = alignlib.makeAlignataVector()
    nsequences = len(sequences)
    
    for x in range(0,nsequences-1):
        for y in range(x+1, nsequences):
            alignator.Align( sequences[x][1], sequences[y][1], map_a2b)

            row_ali, col_ali = alignlib.writeAlignataCompressed( map_a2b )
            
            options.stdout.write( "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" % (\
                sequences[x][0], sequences[y][0],
                map_a2b.getScore(),
                map_a2b.getRowFrom(),
                map_a2b.getRowTo(),
                row_ali,
                map_a2b.getColFrom(),
Beispiel #20
0
    def FillAlignments( self ):
        """the main all-vs-all alignments engine.
        """
        sources = self.mTableSources.GetSources()

        nsources = len(sources)
        if self.mLogLevel >= 1:
            print "--> calculating %i alignments for %i sequences" % ((nsources * (nsources -1)) / 2, nsources)

        # calculate the alignanda objects
        if self.mLogLevel >= 1:
            print "--> retrieving alignanda objects"
            sys.stdout.flush()
            
        alignanda = self.CreateAlignandumObjects( sources )

        map_query2sbjct = alignlib.makeAlignataVector()
        outfile = open (self.mTempFilename, "w" )
        
        
        # do all vs all alignments
        for query in range(0, nsources - 1):
            query_id, query_alignandum = alignanda[query]

            start_time = time.time()
            
            if self.mLogLevel >= 2:
                print "processing id %i at %s" % (query_id, time.asctime(time.localtime(start_time)))
                sys.stdout.flush()
            
            for sbjct in range( query + 1, nsources):
                sbjct_id, sbjct_alignandum = alignanda[sbjct]                

                self.mAlignator.Align( query_alignandum, sbjct_alignandum, map_query2sbjct )

                (query_ali, sbjct_ali) = alignlib.writeAlignataCompressed( map_query2sbjct )

                outfile.write( string.join( map( str, (
                    query_id,
                    map_query2sbjct.getRowFrom(),
                    map_query2sbjct.getRowTo(),
                    query_ali,
                    sbjct_id,
                    map_query2sbjct.getColFrom(),
                    map_query2sbjct.getColTo(),
                    sbjct_ali,
                    map_query2sbjct.getScore(),
                    map_query2sbjct.getNumGaps(),
                    map_query2sbjct.getLength(),
                    0)), "@") + "\n" )
                    
                    
            stop_time = time.time()
            
            if self.mLogLevel >= 2:
                print "--> alignments: %5i, time: %7.2fs" %\
                      ( nsources - query - 1,
                        stop_time - start_time)

        outfile.close()

        # load data
        self.mTableAlignments.Drop()
        self.mTableAlignments.Create()
        self.mTableAlignments.Load( self.mTempFilename)        
Beispiel #21
0
    def MapAndAddDomains( self, domains):
        """Map a domain using an alignment and write to outputfile for 
        loading into table.

        map info from member to rep

        domains contains the following information:
        nid,                        # nid of new rep
        info_mem_from, info_mem_to, info_mem_ali,      # information to be mapped on mem
        info_from, info_to, info_ali,   # information to be mapped on other quantity
        start, end, rep_ali,      # map between mem and new rep, rep-part
        mem_from, mem_to, mem_ali,       # map between mem and new rep, mem-part
        ...info-fields
        """

        temp_filename = os.tempnam( Pairsdb.PATH_LOAD, "clup" )
        
        failed = 0
        
        outfile = open(temp_filename, "w")

        for domain in domains:
            
            ( nid,
              info_mem_from, info_mem_to, info_mem_ali,
              info_from, info_to, info_ali,
              start, end, rep_ali,
              mem_from, mem_to, mem_ali,
              domain_id, family) = domain[:15]
            
            # set does not work (for example 1b77 for ddd, obscure error, probably
            # due to destructors?
            map_info_mem2info = alignlib.makeAlignataVector()
            
            alignlib.fillAlignataCompressed( map_info_mem2info,
                                             info_mem_from, info_mem_ali.tostring(),
                                             info_from, info_ali.tostring() )
            
            map_rep2mem = alignlib.makeAlignataVector()
            alignlib.fillAlignataCompressed( map_rep2mem,
                                             start, rep_ali.tostring(),
                                             mem_from, mem_ali.tostring() )
            
            map_rep2info = alignlib.makeAlignataVector()
            alignlib.combineAlignata( map_rep2info, map_rep2mem, map_info_mem2info, alignlib.CR) 

            if map_rep2info.getLength() == 0:
                if self.mLogLevel >= 2:
                    print "----> mapping failed for", domain
                    sys.stdout.flush()
                failed += 1
                
            else:
                self.WriteLine( outfile, nid, map_rep2info, domain_id, family,
                                domain[15:])
        
        outfile.close()
        
        self.Load( temp_filename )
        
        if self.mLogLevel >= 1:
            print "--> mapping failed for %i pairings." % failed
            sys.stdout.flush()

        return failed
    def CheckLink(self, query_nid, query_from, query_to, sbjct_nid, sbjct_from,
                  sbjct_to):
        """check, whether link is faithfull.
        """

        query_profile = self.GetProfile(query_nid)
        query_profile.useSegment(query_from, query_to)

        sbjct_profile = self.GetProfile(sbjct_nid)
        sbjct_profile.useSegment(sbjct_from, sbjct_to)

        alignator = alignlib.makeFullDP(self.mGop, self.mGep)
        result = alignlib.makeAlignataVector()

        alignator.Align(query_profile, sbjct_profile, result)

        if result.getLength() == 0:
            query_profile.useFullLength()
            sbjct_profile.useFullLength()
            return 0, result

        if self.mLogLevel >= 3:
            print "# --> %i vs %i: score=%5.2f, length=%i, numgaps=%i, row_from=%i, row_to=%i, col_from=%i, col_to=%i" %\
                  (query_nid, sbjct_nid,
                   result.getScore(),
                   result.getLength(),
                   result.getNumGaps(),
                   result.getRowFrom(), result.getRowTo(),
                   result.getColFrom(), result.getColTo())
            sys.stdout.flush()

        if result.getScore() < self.mMinAlignmentScore:
            query_profile.useFullLength()
            sbjct_profile.useFullLength()
            return 0, result

        if result.getScore() > self.mSafetyThreshold * self.mMinAlignmentScore:
            query_profile.useFullLength()
            sbjct_profile.useFullLength()
            return 1, result

        z_params = alignlib.makeNormalDistributionParameters()
        alignlib.calculateZScoreParameters(z_params, query_profile,
                                           sbjct_profile, alignator,
                                           self.mNumIterationsZScore)
        mean = z_params.getMean()
        stddev = z_params.getStandardDeviation()
        if stddev == 0: stddev = 1

        zscore = (result.getScore() - mean) / stddev

        if self.mLogLevel >= 3:
            print "# --> mean=%f, stdev=%f, zscore=%f" % (mean, stddev, zscore)
            sys.stdout.flush()

        query_profile.useFullLength()
        sbjct_profile.useFullLength()

        if zscore > self.mMinZScore:
            return 1, result
        else:
            return 0, result
Beispiel #23
0
def buildMapPdb2Sequence( sequence, filename_pdb, options, pdb_chain = ""):
    """build a map for residue numbers in pdb file to residue numbers on
    a sequence.

    returns the following maps:

    map_structure2seq: mapping of residue numbers between structure and
        sequence. These are mappings that will work if you "renumber" the
        structure.
        
    map_pdb2seq, map_seq2pdb: mapping according to residue numbers in pdb file.
    """

    if not os.path.exists( filename_pdb ):
        return None, None
    
    structure = Scientific.IO.PDB.Structure( filename_pdb )
    
    map_pdb2seq = {}
    map_seq2pdb = {}
    
    for chain in structure.peptide_chains:

        if chain.chain_id == pdb_chain:
            
            ## align pdb sequence to sequence
            map_structure2seq = alignlib.makeAlignataVector()
            alignator = alignlib.makeFullDP( -10.0, -2.0 )

            ## build sequence of pdb file
            structure = ""
            
            for residue in chain.sequence():
                structure += AMINOACIDS[residue]

            ## align reference sequence to sequence of pdb file
            row = alignlib.makeSequence( structure )
            col = alignlib.makeSequence( sequence )
            alignator.Align(row, col, map_structure2seq)

            if options.loglevel >= 3:
                options.stdlog.write( "structure: %s\n" % structure )                
                options.stdlog.write( "sequence : %s\n" % sequence )
                options.stdlog.write( "alignment of structure to sequence:\n" )
                options.stdlog.write( alignlib.writePairAlignment( row, col, map_structure2seq ) + "\n" )
                
            # print alignlib.writeAlignataTable(map_structure2seq)

            residue_number = 0
            
            for residue in chain.residues:

                residue_number += 1
                
                mapped_residue = map_structure2seq.mapRowToCol(residue_number)
                
                if not mapped_residue:
                    if options.loglevel >= 3:
                        options.stdlog.write( "# skipped residue %s=%s %i\n" % (str(residue.number), residue.name, residue_number))
                    continue

                r = str(residue.number)
                map_pdb2seq[r] = mapped_residue
                map_seq2pdb[mapped_residue] = r
                
            return map_structure2seq, map_pdb2seq, map_seq2pdb, residue_number-1, str(chain.residues[0].number), str(chain.residues[-1].number), structure