コード例 #1
0
ファイル: pdb2sequence.py プロジェクト: BioXiao/cgat
def PrintAlignedSequences( sequence1, sequence2, chain = None, format="modeller" ):

    ## align sequences by identity

    seq_row = alignlib.makeSequence( sequence1 )
    seq_col = alignlib.makeSequence( sequence2 )
    alignator = alignlib.makeAlignatorFullDP( -0.0, -0.0 )
    map_row2col = alignlib.makeAlignataVector()
    alignator.Align( seq_row, seq_col, map_row2col )

    lines = string.split(alignlib.writePairAlignment( seq_row, seq_col, map_row2col ), "\n")

    if format == "modeller":
        
        first_res, sequence, last_res = string.split( lines[0], "\t" )
        
        print ">P1;structure"  
        print "structureX: %s : %s : %s : %s : %s : : : : " % ("structure", first_res, "" , last_res, "" )
        print "%s*" % sequence

        first_res, sequence, last_res = string.split( lines[1], "\t" )
        
        print ">P1;sequence"
        print "sequence:%s : %s : %s : %s : %s : : : : " % ("sequence" , first_res, "", last_res, "")
        print "%s*" % sequence
    else:
        print lines
コード例 #2
0
ファイル: pdb2sequence.py プロジェクト: logust79/cgat-apps
def PrintAlignedSequences(sequence1, sequence2, chain=None, format="modeller"):

    ## align sequences by identity

    seq_row = alignlib.makeSequence(sequence1)
    seq_col = alignlib.makeSequence(sequence2)
    alignator = alignlib.makeAlignatorFullDP(-0.0, -0.0)
    map_row2col = alignlib.makeAlignataVector()
    alignator.Align(seq_row, seq_col, map_row2col)

    lines = string.split(
        alignlib.writePairAlignment(seq_row, seq_col, map_row2col), "\n")

    if format == "modeller":

        first_res, sequence, last_res = string.split(lines[0], "\t")

        print ">P1;structure"
        print "structureX: %s : %s : %s : %s : %s : : : : " % (
            "structure", first_res, "", last_res, "")
        print "%s*" % sequence

        first_res, sequence, last_res = string.split(lines[1], "\t")

        print ">P1;sequence"
        print "sequence:%s : %s : %s : %s : %s : : : : " % (
            "sequence", first_res, "", last_res, "")
        print "%s*" % sequence
    else:
        print lines
コード例 #3
0
        if map_query2sbjct.getRowTo() > len(cds_fragment):
            print "# ERROR: length mismatch: cds fragment (%i) shorter than last aligned residue (%i)" %\
            (len(cds_fragment), map_query2sbjct.getRowTo())
            print "#", line
            print "# cds"
            print "#", cds_fragment
            print "# genomic"
            print "#",genomic_fragment
            continue

        cds_seq = alignlib.makeSequence( cds_fragment )
        genomic_seq = alignlib.makeSequence( genomic_fragment )
        
        data = map( lambda x: string.split(x, "\t"),
                    string.split( alignlib.writePairAlignment( cds_seq,
                                                               genomic_seq,
                                                               map_query2sbjct ), "\n" ))


        row_ali, col_ali = Genomics.RemoveFrameShiftsFromAlignment(data[0][1], data[1][1])

        row_ali = Genomics.MaskStopCodons( row_ali )
        col_ali = Genomics.MaskStopCodons( col_ali )        

        if len(row_ali) != len(col_ali):
            print "# ERROR: wrong alignment lengths."
            sys.exit(1)
            
        if len(row_ali) % 3 or len(col_ali) % 3:
            print line
            print row_ali
コード例 #4
0
def buildMapPdb2Sequence( sequence, filename_pdb, options, pdb_chain = ""):
    """build a map for residue numbers in pdb file to residue numbers on
    a sequence.

    returns the following maps:

    map_structure2seq: mapping of residue numbers between structure and
        sequence. These are mappings that will work if you "renumber" the
        structure.
        
    map_pdb2seq, map_seq2pdb: mapping according to residue numbers in pdb file.
    """

    if not os.path.exists( filename_pdb ):
        return None, None
    
    structure = Scientific.IO.PDB.Structure( filename_pdb )
    
    map_pdb2seq = {}
    map_seq2pdb = {}
    
    for chain in structure.peptide_chains:

        if chain.chain_id == pdb_chain:
            
            ## align pdb sequence to sequence
            map_structure2seq = alignlib.makeAlignataVector()
            alignator = alignlib.makeFullDP( -10.0, -2.0 )

            ## build sequence of pdb file
            structure = ""
            
            for residue in chain.sequence():
                structure += AMINOACIDS[residue]

            ## align reference sequence to sequence of pdb file
            row = alignlib.makeSequence( structure )
            col = alignlib.makeSequence( sequence )
            alignator.Align(row, col, map_structure2seq)

            if options.loglevel >= 3:
                options.stdlog.write( "structure: %s\n" % structure )                
                options.stdlog.write( "sequence : %s\n" % sequence )
                options.stdlog.write( "alignment of structure to sequence:\n" )
                options.stdlog.write( alignlib.writePairAlignment( row, col, map_structure2seq ) + "\n" )
                
            # print alignlib.writeAlignataTable(map_structure2seq)

            residue_number = 0
            
            for residue in chain.residues:

                residue_number += 1
                
                mapped_residue = map_structure2seq.mapRowToCol(residue_number)
                
                if not mapped_residue:
                    if options.loglevel >= 3:
                        options.stdlog.write( "# skipped residue %s=%s %i\n" % (str(residue.number), residue.name, residue_number))
                    continue

                r = str(residue.number)
                map_pdb2seq[r] = mapped_residue
                map_seq2pdb[mapped_residue] = r
                
            return map_structure2seq, map_pdb2seq, map_seq2pdb, residue_number-1, str(chain.residues[0].number), str(chain.residues[-1].number), structure
コード例 #5
0
ファイル: blast2fasta.py プロジェクト: siping/cgat
    for line in sys.stdin:
        
        if line[0] == "#": continue

        link.Read( line )
        ninput += 1

        if link.mQueryToken not in sequences or link.mSbjctToken not in sequences:
            nskipped += 1
            continue
        
        ali.Clear()
        alignlib.fillAlignataCompressed( ali, link.mQueryFrom, link.mQueryAli, link.mSbjctFrom, link.mSbjctAli )


        result = alignlib.writePairAlignment( sequences[link.mQueryToken], sequences[link.mSbjctToken], ali ).split("\n")

        if len(result) != 3:
            nfailed += 1

        if options.format == "fasta":
            print ">%s %i-%i\n%s\n>%s %i-%i\n%s\n" %\
                  (link.mQueryToken, link.mQueryFrom, link.mQueryTo, result[0].split("\t")[1],
                   link.mSbjctToken, link.mSbjctFrom, link.mSbjctTo, result[1].split("\t")[1] )
            
        noutput += 1
        
    print "# ninput=%i, noutput=%i, nskipped=%i, nfailed=%i" % (ninput, noutput, nskipped, nfailed)
    E.Stop()