Beispiel #1
0
def convertMali2Alignlib(mali):
    """convert a multiple alignment into an alignlib multiple alignment object."""
    import alignlib
    m = alignlib.makeMultipleAlignment()
    for identifier in mali.getIdentifiers():
        a = alignlib.makeAlignatumFromString(mali[identifier])
        m.add(a)
    return m
Beispiel #2
0
def convertMali2Alignlib( mali ):
    """convert a multiple alignment into an alignlib multiple alignment object."""
    import alignlib
    m = alignlib.makeMultipleAlignment()
    for identifier in mali.getIdentifiers():
        a = alignlib.makeAlignatumFromString( mali[identifier] )
        m.add( a )
    return m
Beispiel #3
0
def convertMali2Mali( mali ):
    """convert a mali to a profile."""

    new_mali = alignlib.makeMultipleAlignment()
    for id in mali.getIdentifiers():
        s = alignlib.makeAlignatumFromString( mali[id] )
        s.thisown = 0
        new_mali.addAlignatum( s )

    return new_mali
Beispiel #4
0
def convertMali2Alignlib( mali ):
    '''convert a multiple alignment of type :class:`Mali`
    into an alignlib multiple alignment object.
    '''

    import alignlib
    m = alignlib.makeMultipleAlignment()
    for identifier in mali.getIdentifiers():
        a = alignlib.makeAlignatum( mali[identifier] )
        m.add( a )
    return m
    def applyMethod(self, neighbours):
        """apply the method."""
        # build multiple alignment
        mali = alignlib.makeMultipleAlignment()

        query_nid = neighbours.mQueryToken

        sequence = self.mFasta.getSequence(query_nid)

        mali.add(alignlib.makeAlignatum(sequence))

        qseq = alignlib.makeSequence(sequence)
        alignator = alignlib.makeAlignatorDPFull(alignlib.ALIGNMENT_GLOBAL,
                                                 -10.0, -1.0, True, True, True,
                                                 True)

        for n in neighbours.mMatches:

            if n.mSbjctToken == query_nid: continue
            sequence = self.mFasta.getSequence(n.mSbjctToken)

            blast_query2sbjct = n.getAlignment()

            if blast_query2sbjct == None:
                raise ValueError(
                    "AddaRealignment.py needs a reference alignment.")

            realign_query2sbjct = alignlib.makeAlignmentVector()

            sseq = alignlib.makeSequence(sequence)
            qseq.useSegment(n.mQueryFrom, n.mQueryTo)
            sseq.useSegment(n.mSbjctFrom, n.mSbjctTo)
            realign_query2sbjct = alignlib.makeAlignmentVector()
            alignator.align(realign_query2sbjct, qseq, sseq)

            nidentical = alignlib.getAlignmentIdentity(realign_query2sbjct,
                                                       blast_query2sbjct,
                                                       alignlib.RR)
            nblast = blast_query2sbjct.getNumAligned()
            nrealigned = realign_query2sbjct.getNumAligned()

            self.mOutfile.write( "%s\t%s\t%i\t%i\t%i\n" % \
                                     (n.mQueryToken, n.mSbjctToken, nidentical, nblast, nrealigned ) )

            if nidentical == nblast:
                self.mNIdentical += 1
            else:
                self.mNDifferent += 1
Beispiel #6
0
def readPicasso( infile ):
    """read alignment in the non-defined picasso format.
    """

    mali = alignlib.makeMultipleAlignment()

    while 1:
        line = infile.readline()
        if not line: break

        x = re.search( "\d+\s+([A-Z\-\.]*)\s+\d+", line)
        if x:
            s = x.groups()[0]
            a = alignlib.makeAlignatumFromString(s)
            a.thisown = 0
            mali.addAlignatum( a )
            
    return mali
Beispiel #7
0
    def applyMethod(self, neighbours ):
        """apply the method."""
        # build multiple alignment
        mali = alignlib.makeMultipleAlignment()
        
        query_nid = neighbours.mQueryToken
        
        sequence = self.mFasta.getSequence( query_nid )

        mali.add( alignlib.makeAlignatum( sequence ) )

        qseq = alignlib.makeSequence( sequence )
        alignator = alignlib.makeAlignatorDPFull( alignlib.ALIGNMENT_GLOBAL, 
                                                  -10.0, -1.0, True, True, True, True)

        for n in neighbours.mMatches:

            if n.mSbjctToken == query_nid: continue
            sequence = self.mFasta.getSequence( n.mSbjctToken )

            blast_query2sbjct = n.getAlignment()

            if blast_query2sbjct == None:
                raise ValueError( "AddaRealignment.py needs a reference alignment.")
            
            realign_query2sbjct = alignlib.makeAlignmentVector()
            
            sseq = alignlib.makeSequence( sequence )
            qseq.useSegment( n.mQueryFrom, n.mQueryTo )
            sseq.useSegment( n.mSbjctFrom, n.mSbjctTo )
            realign_query2sbjct = alignlib.makeAlignmentVector()
            alignator.align( realign_query2sbjct, qseq, sseq )

            nidentical = alignlib.getAlignmentIdentity( realign_query2sbjct, blast_query2sbjct, alignlib.RR )
            nblast = blast_query2sbjct.getNumAligned()
            nrealigned = realign_query2sbjct.getNumAligned()

            self.mOutfile.write( "%s\t%s\t%i\t%i\t%i\n" % \
                                     (n.mQueryToken, n.mSbjctToken, nidentical, nblast, nrealigned ) )
            
            if nidentical == nblast:
                self.mNIdentical += 1
            else:
                self.mNDifferent += 1
Beispiel #8
0
    def buildMali(self, query_nid, neighbours):
        """build a multiple alignment from a set of neighbours.
        """
        # build multiple alignment
        mali = alignlib.makeMultipleAlignment()

        query_sequence = self.mFasta.getSequence(query_nid)

        mali.add(alignlib.makeAlignatum(query_sequence))

        qseq = alignlib.makeSequence(query_sequence)
        alignator = alignlib.makeAlignatorDPFull(alignlib.ALIGNMENT_LOCAL, -10,
                                                 -2)

        nskipped = 0

        for n in neighbours[:self.mMaxNumNeighbours]:

            if n.mSbjctToken == query_nid: continue
            if n.mEvalue > self.mMaxEvalue:
                nskipped += 1
                continue
            sequence = self.mFasta.getSequence(n.mSbjctToken)

            E.debug("adding %s" % str(n))

            map_query2sbjct = n.getAlignment()

            if map_query2sbjct == None:
                sseq = alignlib.makeSequence(sequence)
                qseq.useSegment(n.mQueryFrom, n.mQueryTo)
                sseq.useSegment(n.mSbjctFrom, n.mSbjctTo)
                map_query2sbjct = alignlib.makeAlignmentVector()
                alignator.align(map_query2sbjct, qseq, sseq)

            if map_query2sbjct.getLength() == 0:
                self.warn("empty alignment: %s" % str(n))
                nskipped += 1
                continue

            if map_query2sbjct.getRowTo() > len(query_sequence):
                self.warn( "alignment out of bounds for query: %i>%i, line=%s" %\
                               (map_query2sbjct.getRowTo(), len(query_sequence), str(n)))
                nskipped += 1
                continue

            elif map_query2sbjct.getColTo() > len(sequence):
                self.warn( "alignment out of bounds for sbjct: %i>%i, line=%s" %\
                               (map_query2sbjct.getColTo(), len(sequence), str(n)))
                nskipped += 1
                continue

            try:
                mali.add(alignlib.makeAlignatum(sequence),
                         map_query2sbjct,
                         mali_is_in_row=True,
                         insert_gaps_mali=False,
                         insert_gaps_alignatum=True,
                         use_end_mali=True,
                         use_end_alignatum=False)
            except RuntimeError, msg:
                self.warn("problem when building alignment for %s: msg=%s" %
                          (str(n), msg))
                nskipped += 1
                continue
            lines = map(lambda x: string.split(x[:-1], "\t"), open(a, "r").readlines())
            param_sequences = {}
            for token, sequence in lines:
                if param_sequences.has_key(token):
                    if len(param_sequences[token]) >= len(sequence):
                        continue
                param_sequences[token] = sequence
                
    tbl_nrdb = Table_nrdb( dbhandle )

    query_sequence = None

    if param_unaligned:
        mali = alignlib.makeMultipleAlignmentDots(param_compression)
    else:
        mali = alignlib.makeMultipleAlignment()
        
    map_query2sbjct = alignlib.makeAlignataVector()

    lines = map( lambda x: string.split( x[:-1], "\t")[:9], filter( lambda x: x[0] != "#", sys.stdin.readlines()))

    if param_sort_order:
        data = []
        for line in lines:
            sbjct_nid = line[1]
            if param_sort_order.has_key( sbjct_nid ):
                o = param_sort_order[sbjct_nid]
            else:
                o = len(param_sort_order)
            data.append( (o, line) )
Beispiel #10
0
    def buildMali(self, query_nid, neighbours ):
        """build a multiple alignment from a set of neighbours.
        """
        # build multiple alignment
        mali = alignlib.makeMultipleAlignment()
        
        query_sequence = self.mFasta.getSequence( query_nid )

        mali.add( alignlib.makeAlignatum( query_sequence ) )

        qseq = alignlib.makeSequence( query_sequence )
        alignator = alignlib.makeAlignatorDPFull( alignlib.ALIGNMENT_LOCAL, 
                                                  -10, -2)

        nskipped = 0

        for n in neighbours[:self.mMaxNumNeighbours]:

            if n.mSbjctToken == query_nid: continue
            if n.mEvalue > self.mMaxEvalue: 
                nskipped += 1
                continue
            sequence = self.mFasta.getSequence( n.mSbjctToken )

            E.debug( "adding %s" % str(n) )

            map_query2sbjct = n.getAlignment()

            if map_query2sbjct == None:
                sseq = alignlib.makeSequence( sequence )
                qseq.useSegment( n.mQueryFrom, n.mQueryTo )
                sseq.useSegment( n.mSbjctFrom, n.mSbjctTo )
                map_query2sbjct = alignlib.makeAlignmentVector()
                alignator.align( map_query2sbjct, qseq, sseq )

            if map_query2sbjct.getLength() == 0:
                self.warn( "empty alignment: %s" % str( n ) )
                nskipped += 1
                continue

            if map_query2sbjct.getRowTo() > len(query_sequence):
                self.warn( "alignment out of bounds for query: %i>%i, line=%s" %\
                               (map_query2sbjct.getRowTo(), len(query_sequence), str(n)))
                nskipped += 1
                continue

            elif map_query2sbjct.getColTo() > len(sequence):
                self.warn( "alignment out of bounds for sbjct: %i>%i, line=%s" %\
                               (map_query2sbjct.getColTo(), len(sequence), str(n)))
                nskipped += 1
                continue

            try:
                mali.add( alignlib.makeAlignatum( sequence ),
                          map_query2sbjct,
                          mali_is_in_row = True, 
                          insert_gaps_mali = False,
                          insert_gaps_alignatum = True,
                          use_end_mali = True,
                          use_end_alignatum = False )
            except RuntimeError, msg:
                self.warn( "problem when building alignment for %s: msg=%s" % (str(n), msg))
                nskipped += 1
                continue