Example #1
0
def CapC(Pdb, OutPdbFile=None):
    "Adds c-terminal NME cap to a pdb file."
    #this uses proteinclass
    Pdb = ReturnPdbData(Pdb)
    p = protein.ProteinClass(Pdb=Pdb)
    CapCRes = protein.ProteinClass(Seq="<")
    p = p + CapCRes
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #2
0
def CapN(Pdb, OutPdbFile=None):
    "Adds n-terminal ACE cap to a pdb file."
    #this uses proteinclass
    Pdb = ReturnPdbData(Pdb)
    p = protein.ProteinClass(Pdb=Pdb)
    CapNRes = protein.ProteinClass(Seq=">")
    p = CapNRes + p
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #3
0
def Splice(Pdb1, Pdb2, OutPdbFile=None):
    "Joins two pdb files together."
    #this uses proteinclass
    Pdb1 = ReturnPdbData(Pdb1)
    Pdb2 = ReturnPdbData(Pdb2)
    p1 = protein.ProteinClass(Pdb=Pdb1)
    p2 = protein.ProteinClass(Pdb=Pdb2)
    p3 = p1 + p2
    OutPdb = p3.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #4
0
def Cap(Pdb, OutPdbFile=None):
    "Adds caps ACE and NME to a pdb file."
    #this uses proteinclass
    Pdb = ReturnPdbData(Pdb)
    p = protein.ProteinClass(Pdb=Pdb)
    CapNRes = protein.ProteinClass(Seq=">")
    CapCRes = protein.ProteinClass(Seq="<")
    p = CapNRes + p + CapCRes
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #5
0
def SpliceOpt(Pdb1, Pdb2, OutPdbFile=None, N=5):
    "Splices two pdbs together and rotates to optimize for non-overlap."
    #this uses proteinclass
    Pdb1 = ReturnPdbData(Pdb1)
    Pdb2 = ReturnPdbData(Pdb2)
    p1 = protein.ProteinClass(Pdb=Pdb1)
    p2 = protein.ProteinClass(Pdb=Pdb2)
    p3 = p1 + p2
    if len(p1) > 0 and len(p2) > 0:
        p3.OptimizePep(len(p1), N)
    OutPdb = p3.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #6
0
def Extend(Pdb, SeqBefore, SeqAfter, OutPdbFile=None, Opt=True, N=5):
    "Extends a pdbfile in either or both directions with given sequences."
    #uses proteinclass
    OutPdb = ReturnPdbData(Pdb)
    p = protein.ProteinClass(Pdb=Pdb)
    n = len(p)
    if len(SeqBefore) > 0:
        p = protein.ProteinClass(Seq=SeqBefore) + p
        if Opt and n > 0: p.OptimizePep(len(SeqBefore), N)
    if len(SeqAfter) > 0:
        p = p + protein.ProteinClass(Seq=SeqAfter)
        if Opt and n > 0: p.OptimizePep(n, N)
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #7
0
def Center(Pdb, OutPdbFile=None):
    "Centers a pdb file."
    p = protein.ProteinClass(Pdb=Pdb)
    p.Center()
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #8
0
def RMSDPdb(PdbFile1,
            PdbFile2,
            Center=True,
            Backbone=True,
            AlignSeq=False,
            CompResInd=None,
            CalcResInd=None):
    p1 = protein.ProteinClass(Pdb=PdbFile1)
    p2 = protein.ProteinClass(Pdb=PdbFile2)
    return RMSDProteinClass(p1,
                            p2,
                            Center=Center,
                            Backbone=Backbone,
                            AlignSeq=AlignSeq,
                            CompResInd=CompResInd,
                            CalcResInd=CalcResInd)
Example #9
0
 def __init__(self, cfg, Pdb=None, Seq=None, Model=None, Prefix='ncos'):
     if Verbose: print 'Initializing a NCOS protein object...'
     # set Prefix
     self.Prefix = Prefix
     # has special GLY Params
     self.hasSpecialBBGLYAngles = cfg.hasSpecialBBGLYAngles
     self.hasSpecialBBGLYTorsions = cfg.hasSpecialBBGLYTorsions
     self.hasSpecialGLYParams = cfg.hasSpecialGLYParams()
     # has special PRO params
     self.hasSpecialBBPROAngles = cfg.hasSpecialBBPROAngles
     self.hasSpecialBBPROTorsions = cfg.hasSpecialBBPROTorsions
     self.hasSpecialPROParams = cfg.hasSpecialPROParams()
     # has special sidechains for glycine
     self.hasPseudoGLY = cfg.hasPseudoGLY()
     if self.hasSpecialGLYParams and self.hasPseudoGLY:
         print 'Error: Cannot have pseudo GLY side chain and special GLY BB torsion simultaneously'
         exit()
     # sidechain referencing
     self.SSRefType = cfg.SSRefType
     # extract the entire config object just in case
     self.cfg = cfg
     # if a CG Pdb is provided instead of a Seq
     if Seq is None:
         # internal proteinclass object
         self.p0 = protein.ProteinClass(Pdb, Model=Model)
         self.Seq = self.p0.Seq
         self.Pos = self.p0.Pos
         self.ResChainInds = [
             self.p0.ResChain(i) for i, r in enumerate(self.Seq)
         ]
         self.Chains = self.p0.Chains
         self.NChains = len(self.Chains)
     # if a Seq is provided
     else:
         self.Seq = None
         self.Pos = None
         self.ResChainInds = []
         self.Chains = []
         self.NChains = None
         self.__SetChains(Seq)
     print 'Sequence: %s' % (' '.join(self.Seq))
     # sequence book-keeping
     self.ResTypes = list(set(self.Seq))
     self.NRes = len(self.Seq)
     self.NResTypes = len(self.ResTypes)
     # unpack sidechain atomtypes and assign to this object
     self.AtomSbyNum = []
     self.AtomSbyRes = {}
     self.__SetSideChains()
     # build startatoms for quick backbone referencing
     self.StartAtomInds = []
     self.RelativeStartAtomInds = []
     self.__SetStartAtomInds()
     self.__SetRelativeStartAtomInds()
     # atomnames
     self.AtomNames = []
     self.__SetAtomNames()
     # generate bonds
     self.BondPairs = None
     self.__SetBonds()
Example #10
0
def Standardize(Pdb, OutPdbFile=None):
    "Formats to standard residue and atom names, etc."
    Pdb = ReturnPdbData(Pdb)
    OutPdb = []
    for l in Pdb.split("\n"):
        if l.startswith("ATOM"):
            #fix histidines
            if l[17:20] in ["HID", "HIE", "HIP"]:
                l = l[:17] + "HIS" + l[20:]
            elif l[17:20] in ["CYX", "CYM"]:
                l = l[:17] + "CYS" + l[20:]
            #fix amide hydrogens
            if l[12:16] == " HN ":
                l = l[:12] + " H  " + l[16:]
        elif l.startswith("HETATM") and l[17:20] == 'MSE':
            #replace selenomethionine
            l = "ATOM  " + l[6:17] + "MET" + l[20:]
        #check alternate location indicator
        if l.startswith("ATOM"):
            if l[16] == "A":
                l = l[:16] + " " + l[17:]
            elif not l[16] == " ":
                continue
        OutPdb.append(l)
    OutPdb = "\n".join(OutPdb)
    OutPdb = Renumber(OutPdb)
    #now generate chains
    p = protein.ProteinClass(Pdb=OutPdb)
    p.GenerateChains()
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #11
0
def BFactorRMSD(RefPdb, Pdb, OutPdbFile=None):
    "Adds BFactors proportional to atom RMSD."
    import rmsd
    Pdb = ReturnPdbData(Pdb)
    RefPdb = ReturnPdbData(RefPdb)
    p = protein.ProteinClass(Pdb=Pdb)
    pref = protein.ProteinClass(Pdb=RefPdb)
    r, NRes = rmsd.RMSDProteinClass(pref,
                                    p,
                                    Backbone=False,
                                    AlignSeq=True,
                                    UpdateBFactors=True,
                                    AlignAtoms=True)
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #12
0
def Generate(Seq, OutPdbFile=None):
    "Generates a pdb file of extended sequence Seq."
    #uses proteinclass
    p = protein.ProteinClass(Seq=Seq)
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #13
0
def Map2Polymer(Pdb, AAPdb, PolyName, Model = None, MappedPrefix = None, hasPseudoGLY = True, DelTmpPdb = True):
    ''' maps the given (CG) Pdb to a polymer of equivalent length 
    and returns a CG Pdb that is mapped to a poly-peptide (PolyName) of equivalent length
    Energy minimization not yet implemented'''
    # read in unmapped AA Pdb
    p_AA = protein.ProteinClass(AAPdb, Model = Model)
    p_AA = p_AA.Decap()
    # map this to a polymer of equivalent length
    print 'Mapping structure to a %s-%d sequence' % (PolyName, len(p_AA.Seq))
    PolySeq = [PolyName] * len(p_AA.Seq)
    p_AA = p_AA.MutateSeq(PolySeq)
    PolyAAPdb = 'polyAA.pdb'
    p_AA.WritePdb(PolyAAPdb)
    # coarse grain this pdb
    PolyCGPdb = 'polyCG.pdb'
    Map(InPdb = PolyAAPdb, CGPrefix = 'polyCG', hasPseudoGLY = hasPseudoGLY)
    # write coarse grained co-ordinates using given Pdb seq
    p_CG = protein.ProteinClass(Pdb)
    p_PolyCG = protein.ProteinClass(PolyCGPdb)
    p_CG.Pos = p_PolyCG.Pos
    if not MappedPrefix is None: MappedPdb = MappedPrefix + '.pdb'
    else:
        PdbName = Pdb.split('/')[-1].split('.pdb')[0]
        MappedPdb = os.path.join(os.getcwd(), PdbName + '_map2%s.pdb' % PolyName.lower())
    p_CG.WritePdb(MappedPdb)
    # copy over CONECT records if present
    s = ''
    with open(Pdb, 'r') as of: lines = of.readlines()
    try:
        start = [lines.index(line) for line in lines if line.startswith('CONECT')][0]
        stop = len(lines)
        s = ''.join(lines[start:stop])
    except ValueError:
        s = ''
    s0 = file(MappedPdb, 'r').read()
    if s:
        s0 += '\n'
        s0 += s
    file(MappedPdb, 'w').write(s0)
    # del temp files
    if DelTmpPdb:
        for i in [PolyAAPdb, PolyCGPdb]: os.remove(i)
    return MappedPdb
Example #14
0
def ExtractConect(NativePdb, Pdb):
    # clustered pdbs come with all cluster structs
    # followed by CONECT records at the end. This parses
    # CONECT records from the native pdb and places it after
    # the top cluster struct
    p = protein.ProteinClass(Pdb)
    pdbstr = p.GetPdb()
    s = file(NativePdb, 'r').readlines()
    start = [s.index(i) for i in s if i.startswith('CONECT')][0]
    stop = len(s)
    conectstr = ''.join(s[start:stop])
    return pdbstr + '\n' + conectstr
Example #15
0
 def GenRandInitPos(self):
     '''generate random initial position based on /share/apps/scripts/template.pdb
         to run from a different seed structure just place a cg pdb called init.pdb with that structure'''
     tmpPdb = os.path.join(os.getcwd(), 'tmp.pdb')
     if self.InitPdb is None: self.InitPdb = 'init.pdb'
     if not os.path.isfile(self.InitPdb):
         print 'Generating fully extended initial AA structure. ',
         # create an ALL-ATOM protein class object for the given sequence
         pobj = protein.ProteinClass(Seq=self.p.Seq)
         pobj.WritePdb(tmpPdb)
         # coarse grain all-atom proteinclass object
         mapNCOS.Map(InPdb=tmpPdb,
                     CGPrefix=self.InitPdb.split('.pdb')[0],
                     hasPseudoGLY=self.p.hasPseudoGLY)
         # remove all-atom pdb
         if os.path.isfile(tmpPdb): os.remove(tmpPdb)
         del pobj
     print 'Using init conf as generated in : %s\n' % self.InitPdb
     pobj = protein.ProteinClass(self.InitPdb)
     initpos = pobj.Pos
     return initpos
Example #16
0
def RandDihedrals(Pdb, OutPdbFile=None, DeltaAng=5.):
    "Adds a random angle to the torsions along the backbone."
    #this uses proteinclass
    Pdb = ReturnPdbData(Pdb)
    p = protein.ProteinClass(Pdb=Pdb)
    for i in range(0, len(p)):
        Phi, Psi = p.PhiPsi(i)
        if not Phi is None and not Psi is None:
            Phi += DeltaAng * (2. * random.random() - 1.)
            Psi += DeltaAng * (2. * random.random() - 1.)
            p.RotateToPhiPsi(i, Phi, Psi)
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #17
0
def Rotate(Pdb, ResNum, Phi=None, Psi=None, OutPdbFile=None):
    "Rotates a residue to specified phi, psi."
    #this uses proteinclass
    Pdb = ReturnPdbData(Pdb)
    p = protein.ProteinClass(Pdb=Pdb)
    if not ResNum in range(0, len(p)):
        raise IndexError, "Residue number %d not found" % ResNum
    try:
        p.RotateToPhiPsi(ResNum, Phi, Psi)
    except StandardError:
        print "Could not perform rotation."
        return Pdb
    OutPdb = p.GetPdb()
    SavePdbData(OutPdb, OutPdbFile)
    return OutPdb
Example #18
0
 def __init__(self, Pdb, Model=None):
     # protected (internal) proteinclass object
     self.Pdb = Pdb
     self.__p = protein.ProteinClass(Pdb, Model=Model)
     # copy over some attributes from the original proteinclass
     self.AtomNames = self.__p.AtomNames()
     self.AtomRes = self.__p.AtomRes()
     self.AtomResNum = self.__p.AtomResNum
     # sequence manipulation
     self.UpdateSeq()
     # set atom start indices
     self.StartInds = []
     self.GetStartInds()
     # bacbone indices
     self.BBInds = self.GetBBInds()
     # Co-ordinates manipulation
     self.Pos = None
     self.UpdatePos()
Example #19
0
def f(ax1, ax2, pset, fftype, plotnative=True):
    N = len(pset)
    L = np.zeros(N)
    native_co = np.zeros(N)
    traj_co = np.zeros(N)
    traj_rmsd = np.zeros(N)

    for i, p in enumerate(pset):
        nativepdb = os.path.expanduser('~/Go/native_struct/mapped/%s.pdb' % p)
        prot = protein.ProteinClass(nativepdb)
        L[i] = len(prot.Seq)
        shelf = os.path.join(Dir, fftype, p, 'prot_%s.shelf' % p)
        d = shelve.open(shelf)
        rmsdhist = d['rmsd']
        avgrmsd = np.sum(rmsdhist[0] * rmsdhist[1]) / np.sum(rmsdhist[1])
        nativeco, cohist = d['co']
        avgco = np.sum(cohist[0] * cohist[1]) / np.sum(cohist[1])
        native_co[i] = nativeco
        traj_co[i] = avgco
        traj_rmsd[i] = avgrmsd
        d.close()

    ind = np.argsort(L)
    s = ', '.join([pset[i] for i in ind])
    print s
    if plotnative:
        ax1.plot(L[ind],
                 native_co[ind],
                 color='blue',
                 marker='o',
                 label='Native')
    ax1.plot(L[ind],
             traj_co[ind],
             color=Clrs[fftype],
             marker='o',
             label=fftype)
    ax2.plot(L[ind],
             traj_rmsd[ind],
             color=Clrs[fftype],
             marker='o',
             markersize=8,
             label=fftype)
    ax1.legend()
    ax2.legend()
Example #20
0
def RunAnal(CoordsObj, OutputPath=None, Prefix=None):
    "Runs mesostring analysis of a coords object."
    #link proteinclass
    p = protein.ProteinClass()
    p.LinkCoordsObj(CoordsObj)
    #loop through configurations
    ConfMeso = []
    CoordsObj.Reset()
    Mask = MesoMask(p)
    while True:
        Pos = CoordsObj.GetNextCoords()
        if Pos is None: break
        #parse dihedrals
        Dih = []
        for i in range(0, len(p)):
            Dih.append(p.PhiPsi(i))
        ConfMeso.append(MesoStringMask(Dih, Mask))
    #unlink
    p.UnlinkCoordsObj()
    #compute the number of instances of each mesostring
    MesoDict = {}
    for s in ConfMeso:
        if s in MesoDict:
            MesoDict[s] += 1
        else:
            MesoDict[s] = 1
    #put into a list and sort by population
    MesoPop = [(p, s) for (s, p) in MesoDict.iteritems()]
    MesoPop.sort()
    MesoPop.reverse()
    MesoPop = [(s, p) for (p, s) in MesoPop]
    #calculate the mesostate entropy
    Tot = float(len(ConfMeso))
    MesoEntropy = log(Tot) - sum([p * log(p) for (s, p) in MesoPop]) / Tot
    #write the output
    ConfIndices = CoordsObj.GetIndices()
    if not OutputPath is None:
        WriteAnalysis(OutputPath, ConfMeso, MesoPop, MesoEntropy, Prefix,
                      ConfIndices)
    return ConfMeso, MesoPop, MesoEntropy
Example #21
0
def ReverseMap(CGPdb, Prefix, Model = None, hasPseudoGLY = False):
    # parse coarse grained pdb
    p = protein.ProteinClass(CGPdb, Model = Model)
    Seq = p.Seq
    Pos = p.Pos
    PdbString = p.GetPdb()
    # parse backbone and sidechain atoms
    NInds = p.AtomInd(AtomName = 'N')
    CInds = p.AtomInd(AtomName = 'C')
    OInds = p.AtomInd(AtomName = 'O')
    SInds = p.AtomInd(AtomName = 'S')
    # generate approx carbonyl groups for all but last residue
    s = ''
    n = 0
    CurrentChain = -1
    i_gly = 0
    for i, r in enumerate(Seq):
        # determine if a new chain starts here 
        thisChain = p.ResChain(i)
        if not thisChain == CurrentChain:
            CurrentChain = thisChain # update Chain number
            if not i == 0: s +=  "TER\n" # inter-chain TER records
        # Amide nitrogen
        PosN = Pos[NInds[i]]
        s += PDBFMT % (n+1, 'N ', r, string.ascii_uppercase[thisChain], i+1, PosN[0], PosN[1], PosN[2], 1.0, 0.0)   
        s += '\n'
        n += 1
        # Alpha carbon
        PosCA = Pos[CInds[i]]
        s += PDBFMT % (n+1, 'CA ', r, string.ascii_uppercase[thisChain], i+1, PosCA[0], PosCA[1], PosCA[2], 1.0, 0.0)
        s += '\n'
        n += 1
        # CG oxygen site
        PosCGO = Pos[OInds[i]]
        # find effective carbonyl
        if i < len(Seq) - 1:
            PosNextN = Pos[NInds[i+1]]
            PosC, PosO = Project(PosCA, PosCGO, PosNextN)
            # Carbonyl C
            s += PDBFMT % (n+1, 'C ', r, string.ascii_uppercase[thisChain], i+1, PosC[0], PosC[1], PosC[2], 1.0, 0.0)
            s += '\n'
            n += 1
            # Carbonyl O 
            s += PDBFMT % (n+1, 'O ', r, string.ascii_uppercase[thisChain], i+1, PosO[0], PosO[1], PosO[2], 1.0, 0.0)
            s += '\n'
            n += 1
        else:
            # retain CG O site for last residue
            s += PDBFMT % (n+1, 'O ', r, string.ascii_uppercase[thisChain], i+1, PosCGO[0], PosCGO[1], PosCGO[2], 1.0, 0.0)
            s += '\n'
            n += 1
        # Sidechain
        if not r == 'GLY' or hasPseudoGLY:
            PosS = Pos[SInds[i_gly]]
            s += PDBFMT % (n+1, 'S ', r, string.ascii_uppercase[thisChain], i+1, PosS[0], PosS[1], PosS[2], 1.0, 0.0)
            s += '\n'
            n += 1
            i_gly += 1
    # write AA pdb
    s += 'TER\n' # terminal record
    OutPdb = Prefix + '.pdb'
    with open(OutPdb, 'w') as of: of.write(s)
Example #22
0
    CalcResInd = GetResList(Args.get("calcres", None))

    #check for a trajectory
    if "traj" in Args["FLAGS"]:
        PdbRef, TrjFile, PrmtopFile = Args["ARGS"][:3]
        NAvg = int(Args.get("avg", 1))
        Cut = float(Args.get("frac", 0.))
        NSkip = int(Args.get("nskip", 0))
        NRead = int(Args.get("nread", -1))
        NStride = int(Args.get("nstride", 1))
        Trj = coords.TrjClass(TrjFile,
                              PrmtopFile,
                              NSkip=NSkip,
                              NRead=NRead,
                              NStride=NStride)
        pTrj = protein.ProteinClass()
        pTrj.LinkTrj(Trj)
        pRef = protein.ProteinClass(Pdb=PdbRef)
        print "%-10s %-8s %-8s %-5s" % ("Frame", "BB_RMSD", "All_RMSD", "NRes")
        i = 0
        y1, y2 = [], []
        z1, z2 = [], []
        for Pos in Trj:
            i += 1
            x1, NRes = RMSDProteinClass(pRef,
                                        pTrj,
                                        Backbone=True,
                                        AlignSeq=Align,
                                        CompResInd=CompResInd,
                                        CalcResInd=CalcResInd)
            x2, NRes = RMSDProteinClass(pRef,
Example #23
0
                    NSkip=NSkip,
                    NStride=NStride)

NAtom = len(RefPos)
print "Found %d atoms" % NAtom
dSq = zeros(NAtom, float)
N = 0

for Pos in t:
    N += 1
    r = rmsd.RMSD(RefPos, Pos, Align=True)
    dSq += ((RefPos - Pos)**2).sum(axis=1)
    if N % 100 == 0: print "Analyzed %d frames" % N
dSq = dSq / N
dSq = sqrt(dSq)
print "Analyzed %d frames" % N

#s = "FLUCTUATION RESULTS\n"
#s += "atom number, root-mean-square fluctuation\n"
#for i in range(0,NAtom):
#  s += "%-4d %-8.2f\n" % (i+1, dSq[i])
#file("fluctresults.txt", "w").write(s)

p = protein.ProteinClass(Pdb=PdbFile)
i = 0
for r in p.Res:
    for a in r.Atoms:
        a.BFactor = dSq[i]
        i += 1
p.WritePdb(os.path.basename(PdbFile).replace(".pdb", "-fluct.pdb"))
Example #24
0
#!/usr/bin/env python

import os, sys, numpy as np, cPickle as pickle
import matplotlib ; matplotlib.use('Agg')
import matplotlib.gridspec as gridspec
import matplotlib.cm as cmap
import matplotlib.pyplot as plt
import protein, cgprotein as cg, utils

fftypes = ['ala15', 'leu15', 'val15']
NRes = 15

# create ideal helix pdb
p = protein.ProteinClass(['ALA']*15)
for i in range(15): p.RotateToPhiPsi(ResNum = i, Phi = -60, Psi = -45)
p.WritePdb('ideal_helix_unmapped.pdb')
cmd = 'python ~/Go/map.py ideal_helix_unmapped.pdb ./ideal_helix'
os.system(cmd)

# get ideal beta-hairpin (take the topclust from val15_AA simulations)
hairpin_pdb = os.path.abspath('../val15_AA/topclust.pdb')

# Trajectories
Traj = {'ala_spc': os.path.abspath('../ala_spc_AA/Lammps/ala_spc.300.00.lammpstrj.gz'),
        'ala15': os.path.abspath('../ala15_AA/Lammps/ala15.299.00.lammpstrj.gz'),
        'leu15': os.path.abspath('../leu15_AA/Lammps/leu15_wham.407.00.lammpstrj.gz'),
        'val15': os.path.abspath('../val15_AA/Lammps/val15_wham.367.00.lammpstrj.gz')
       }

Temp = {'ala_spc': 300.0, 'ala15': 299.00, 'leu15': 407.00, 'val15': 367.00}
NativePdb = {'ala_spc': 'ideal_helix.pdb', 'ala15': 'ideal_helix.pdb', 'leu15': 'ideal_helix.pdb', 'val15': hairpin_pdb}
Example #25
0
IndPdb		: input pdb
Cutoff		: cutoff distance to calculate hydrophobic interaction		
"""

import sys, protein, geometry

if len(sys.argv) == 1:
    print Usage
    sys.exit()

#get arguments
Pdb = sys.argv[1]
Cutoff = int(sys.argv[2])

p = protein.ProteinClass(Pdb=Pdb)

#check residues for hydrophobic residues
ResInd = p.ResInd(Hydrophobic=True)
Pos = p.ResPos()
N = len(p)
#print Pos

#Determine which hydrophobic residues are within the cutoff
for i in range(N):
    if not i in ResInd: continue
    for j in range(i + 1, N):
        if not j in ResInd: continue
        if geometry.Length(Pos[i] - Pos[j]) <= Cutoff:
            print "%6d %6d %6d" % (i + 1, j + 1,
                                   geometry.Length(Pos[i] - Pos[j]))
Example #26
0
CCaps = ['NME', 'NHE']
PDBFMT = "ATOM  %5d %4s %3s %1s%4d    %8.3f%8.3f%8.3f %5.2f%6.2f"  # (atomind + 1, atomname, resname, reschainind, resnum+1, x, y, z)
BONDFMT = "%6s%5d%5d\n"  # ('CONECT', a + r.StartAtom + 1, b + r.StartAtom + 1)

# Inputs
InPdb = os.path.abspath(sys.argv[1])
CGPrefix = os.path.abspath(sys.argv[2]) if len(sys.argv) > 2 else 'testcg'
AATraj = os.path.abspath(sys.argv[3]) if len(sys.argv) > 3 else None
PrmTop = os.path.abspath(
    sys.argv[4]) if len(sys.argv) > 4 and sys.argv[4] else None
AmberEne = os.path.abspath(
    sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] else None
LastNFrames = int(sys.argv[6]) if len(sys.argv) > 6 else 0

# read in protein structure
p = protein.ProteinClass(Pdb=InPdb)
p.Update()
PdbString = p.GetPdb()
Pos = p.Pos

# is this structure capped (# adapdted from /share/apps/pdbtools.py)
NCap = [
    s for s in PdbString.split("\n") if s[0:4] == "ATOM" and s[17:20] in NCaps
]
CCap = [
    s for s in PdbString.split("\n") if s[0:4] == "ATOM" and s[17:20] in CCaps
]
hasNCap = len(NCap) > 0
hasCCap = len(CCap) > 0

# Perform some book-keeping on the sequence
Example #27
0
def PdbMesoString(PdbFile):
    "Returns a string of mesostates for a pdb file."
    p = protein.ProteinClass(Pdb=PdbFile)
    Dih = [p.PhiPsi(i) for i in range(0, len(p))]
    return Mesostring(Dih, p.Seq)
Example #28
0
def GetCommon(CObj, RunSS=False, Verbose=False):
    """Returns residues with common secondary structure, common dihedrals,
  and common contacts."""
    if Verbose: print "Getting common secondary structures and contacts..."
    SSList, PhiPsiList, PhiPsiSqList = [], [], []
    CM = None
    NFrame = len(CObj)
    #run through all of the pdbs and tabulate the data
    CObj.Reset()
    p = protein.ProteinClass()
    p.LinkCoordsObj(CObj)
    Seq = p.Seq
    SeqLen = len(Seq)
    for (i, Pos) in enumerate(CObj):
        if Verbose:
            if "PdbFileList" in CObj.__dict__:
                print "Examining pdb %s" % CObj.PdbFileList[i]
            else:
                if (i + 1) % 100 == 0: print "Examined %d frames" % (i + 1)
        if RunSS: SSList.append(p.SecondaryStructure())
        ThisPhiPsiList = array([p.PhiPsi(i) for i in range(len(p))], float)
        PhiPsiList.append(ThisPhiPsiList)
        if CM is None:
            CM = p.ResContactMap()
        else:
            CM += p.ResContactMap()
    #find the most commond H or E motif at each point along the chain
    if RunSS:
        SSVals = ["H", "E"]
        ResSS, ResSSFrac = "", []
        for i in range(SeqLen):
            s = [SS[i] for SS in SSList]
            nVals = [s.count(x) for x in SSVals]
            ind = argmax(nVals)
            f = float(nVals[ind]) / float(NFrame)
            ResSSFrac.append(f)
            if f >= SSThresh:
                ResSS += SSVals[ind]
            else:
                ResSS += "-"
    else:
        ResSS = "-" * SeqLen
        ResSSFrac = [0.] * SeqLen
    #now find average phi-psi angles for each one
    FixedDih = []
    for i in range(SeqLen):
        ThisResPhiPsi = array([PhiPsi[i] for PhiPsi in PhiPsiList], float)
        s = std(ThisResPhiPsi, axis=0)
        a = mean(ThisResPhiPsi, axis=0)
        if all(s <= DihThresh):
            FixedDih.append((i, a[0], a[1], s[0], s[1]))
    #now compute contacts
    CM = CM / NFrame
    Contacts = []
    SkipMask = [not sequence.Hydrophobic(x) and PhobicsOnly for x in Seq]
    for i in range(SeqLen):
        if SkipMask[i]: continue
        for j in range(i + 3, SeqLen):
            if SkipMask[j]: continue
            if CM[i, j] >= ContThresh:
                Contacts.append((i, j, CM[i, j]))
    #summarize
    nums = "1234567890" * (SeqLen / 10 + 1)
    nums = nums[:SeqLen]
    if RunSS:
        s = "CONSENSUS SECONDARY STRUCTURE:\n%s\n%s" % (nums, ResSS)
    else:
        s = ""
    s += "\n\nCONSENSUS DIHEDRALS:\nRes PhiAvg PsiAvg PhiStd PsiStd\n"
    s += "\n".join([
        "%s%d %8.2f %8.2f %8.2f %8.2f" % (Seq[i], i + 1, x, y, z, w)
        for (j, (i, x, y, z, w)) in enumerate(FixedDih)
    ])
    s += "\n\nCONSENSUS CONTACTS:\nResi Resj FracMade\n"
    s += "\n".join([
        "%s%d %s%d %8.3f" % (Seq[i], i + 1, Seq[j], j + 1, f)
        for (i, j, f) in Contacts
    ])
    if Verbose: print ""
    print s
    #get sequence
    Seq = sequence.Standardize(Seq)
    return Seq, Contacts, ResSS, ResSSFrac, FixedDih
Example #29
0
def Overlay(NativePdb,
            Pdb,
            OutPrefix=None,
            Label='',
            SinglePlot=False,
            hasPseudoGLY=False):
    global doRotate
    if OutPrefix is None: OutPrefix = 'go'
    # parse BBInds
    p_cg = cgprotein.ProteinNCOS(NativePdb, hasPseudoGLY=hasPseudoGLY)
    BBInds = p_cg.GetBBInds()
    # read pdbs
    pNative = protein.ProteinClass(NativePdb)
    p = protein.ProteinClass(Pdb, Model=1)
    # rotate the native pdb
    if doRotate: pNative = RotateProteinClass(pNative)
    # align with rotated native struct (produces weird results with vmd, so let vmd do its own )
    if not Renderer == 'vmd':
        p, pNative = AlignProtein(p, pNative, BBInds)
    # write to first set of tmp pdb files
    tmpNativePdb = os.path.join(os.getcwd(), '%s_tmpnative.pdb' % OutPrefix)
    tmpPdb = os.path.join(os.getcwd(), '%s_tmp.pdb' % OutPrefix)
    pNative.WritePdb(tmpNativePdb)
    p.WritePdb(tmpPdb)
    # now reverse map these pdbs to generate an approximate carbonyl group
    # so that STRIDE can assign secondary structures
    mapNCOS.ReverseMap(CGPdb=tmpNativePdb,
                       Prefix=tmpNativePdb.split('.pdb')[0],
                       hasPseudoGLY=hasPseudoGLY)
    mapNCOS.ReverseMap(CGPdb=tmpPdb,
                       Prefix=tmpPdb.split('.pdb')[0],
                       hasPseudoGLY=hasPseudoGLY)
    # fill dictionary for renderer script
    d = {
        'TMPNATIVEPDB': tmpNativePdb,
        'TMPPDB': tmpPdb,
    }
    # pymol
    if Renderer == 'pymol':
        d['FILENAME'] = OutPrefix + '.png' if not SinglePlot else OutPrefix + '_tmp0.png'
        tmpPml = OutPrefix + '.pml'
        file(tmpPml, 'w').write(s_pymol % d)
        cmdstr1 = '%s -Qc %s' % (PYMOLEXEC, tmpPml)
        os.system(cmdstr1)
        for x in [tmpNativePdb, tmpPdb, tmpPml]:
            os.remove(x)
    # vmd
    elif Renderer == 'vmd':
        d['FILEPREFIX'] = OutPrefix if not SinglePlot else OutPrefix + '_tmp0'
        d['TACHYONEXEC'] = TACHYONEXEC
        tmpTcl = OutPrefix + '.tcl'
        file(tmpTcl, 'w').write(s_vmd % d)
        cmdstr1 = '%s -dispdev text -eofexit -e %s > /dev/null 2>&1' % (
            VMDEXEC, tmpTcl)
        os.system(cmdstr1)
        for x in [tmpNativePdb, tmpPdb, tmpTcl, d['FILEPREFIX']]:
            os.remove(x)
    else:
        print 'ERROR: Renderer not found'
        exit()
    # if single plot with supplied labels is requested (mostly when used from the command line)
    if SinglePlot:
        if not Label:
            rmsd = sim.geom.RMSD(pNative.Pos[BBInds], p.Pos[BBInds])
            print rmsd
            Label = r'$RMSD = %2.2f \AA$' % rmsd
        if Renderer == 'pymol': pic0 = OutPrefix + '_tmp0.png'
        else: pic0 = OutPrefix + '_tmp0.tga'
        pic1 = OutPrefix + '_tmp1.png'
        cmdstr2 = '%s %s -trim -bordercolor white -background white -border 50x50 -quality 100 %s' % (
            IMAGEMAGICKEXEC, pic0, pic1)
        os.system(cmdstr2)
        pic = mpimg.imread(pic1)
        fig = plt.figure(figsize=(5, 5), facecolor='w', edgecolor='w')
        ax = fig.add_subplot(1, 1, 1)
        ax.imshow(pic, aspect='auto')
        #ax.set_title(Label, fontsize = 8)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        figname = OutPrefix + '.png'
        plt.savefig(figname, bbox_inches='tight')
        for x in [pic0, pic1]:
            os.remove(x)
    return
Example #30
0
def Map(InPdb, CGPrefix, Model = None, AATraj = None, PrmTop = None, AmberEne = None, LastNFrames = 0, hasPseudoGLY = True):
    ''' Maps an all-atom pdb to a CG N-C-O-S version. If pseudo GLY side chains are requested,
    automatically hydrogenates GLY residues that don't have alpha hydrogens'''
    if hasPseudoGLY:
        print 'Using pseudo Glycines'
    # read in protein structure
    p = protein.ProteinClass(Pdb = InPdb, Model = Model)
    p.Update()
    # hydrogenate glycines that don't have hydrogens
    if hasPseudoGLY: AddH_GLY(p)
    # extract the all-atom pdb string
    PdbString = p.GetPdb()
    Pos = p.Pos
    Seq = p.Seq
    # is this structure capped (# adapdted from /share/apps/pdbtools.py)
    NCap = [s for s in PdbString.split("\n") if s[0:4]=="ATOM" and s[17:20] in NCaps]
    CCap = [s for s in PdbString.split("\n") if s[0:4]=="ATOM" and s[17:20] in CCaps]
    hasNCap = len(NCap) > 0
    hasCCap = len(CCap) > 0
    # Perform some book-keeping on the sequence
    Seq = p.Seq
    if hasNCap: Seq = Seq[1:]
    if hasCCap: Seq = Seq[:-1]
    ResCount = dict( (x,Seq.count(x)) for x in set(Seq) )
    if 'GLY' in ResCount.keys() and not hasPseudoGLY:
        NCGAtoms = 3 * ResCount['GLY'] + 4 * ( sum(ResCount.values()) - ResCount['GLY'] )
    else:
        NCGAtoms = 4 * len(Seq)
    # Masks
    DecapFilter = lambda ResName, AtomName : not ( ResName in (NCaps + CCaps) )    
    SFilter_Other = lambda ResName, AtomName : not (AtomName == 'N' or AtomName == 'CA' or AtomName == 'C' or AtomName == 'O' or 'H' in AtomName)
    SFilter_GLY = lambda ResName, AtomName: (ResName == 'GLY' and 'HA' in AtomName)
    # Parse atom indices based on masks
    NInds = p.AtomInd(AtomName = 'N', UserFunc = DecapFilter)
    CAInds = p.AtomInd(AtomName = 'CA', UserFunc = DecapFilter)
    CInds = p.AtomInd(AtomName = 'C', UserFunc = DecapFilter)
    OInds = p.AtomInd(AtomName = 'O', UserFunc = DecapFilter)
    SInds = dict( (i, []) for i in range(len(Seq)) )
    for i, r in enumerate(Seq):
        startres = 1 if hasNCap else 0
        # sidechain hydrogens for GLY
        if r == 'GLY' and hasPseudoGLY:
            this_SInds = p.AtomInd(ResNum = startres + i, UserFunc = DecapFilter and SFilter_GLY)
            # find out which hydrogen is prochiral-Si
            ind_H = findProchiralH_GLY(PosN = Pos[NInds[i]], PosCA = Pos[CAInds[i]], PosC = Pos[CInds[i]], 
                                       PosH = ( Pos[this_SInds[0]], Pos[this_SInds[1]] ) )
            SInds[i] = [this_SInds[ind_H]]
        else:
            SInds[i] = p.AtomInd(ResNum = startres + i, UserFunc = DecapFilter and SFilter_Other)
    # Write Masks to a Map and final CG atoms to CG PDB
    CurrentChain = -1    
    s = ''
    s_bond = ''
    MapDict = dict( (i, []) for i in range(NCGAtoms) )
    n = 0
    for i, r in enumerate(Seq):
        # determine if a new chain starts here
        thisChain = p.ResChain(i)
        if not thisChain == CurrentChain:
            if not i == 0: s +=  "TER\n" # inter-chain TER records
        # N lines (do not bond when starting a new chain)
        if thisChain == CurrentChain:
            if Seq[i-1] == 'GLY' and not hasPseudoGLY: s_bond += BONDFMT % ('CONECT', n, n+1)
            else: s_bond += BONDFMT % ('CONECT', n-1, n+1)
        N_CGInd = n ; n+= 1
        MapDict[N_CGInd].append(NInds[i])
        s += PDBFMT % (N_CGInd+1, 'N  ', r, string.ascii_uppercase[thisChain], i+1, Pos[NInds[i], 0], Pos[NInds[i], 1], Pos[NInds[i], 2], 1.0, 0.0)
        s += "\n"
        # now update chains
        CurrentChain = thisChain
        # C lines
        s_bond += BONDFMT % ('CONECT', n, n+1)
        C_CGInd = n ; n+= 1
        MapDict[C_CGInd].append(CAInds[i])
        s += PDBFMT % (C_CGInd+1, 'C  ', r, string.ascii_uppercase[thisChain], i+1, Pos[CAInds[i], 0], Pos[CAInds[i] ,1], Pos[CAInds[i] ,2], 1.0, 0.0)
        s += "\n"
        # O lines
        s_bond += BONDFMT % ('CONECT', n, n+1)
        O_CGInd = n; n+=1
        MapDict[O_CGInd].extend([ CInds[i], OInds[i] ])
        COMPos = (Pos[CInds[i]] + Pos[OInds[i]]) / 2.
        s += PDBFMT % (O_CGInd+1, 'O  ', r, string.ascii_uppercase[thisChain], i+1, COMPos[0], COMPos[1], COMPos[2], 1.0, 0.0)
        s += "\n"
        # S lines
        if not r == 'GLY' or hasPseudoGLY:
            if genBonds == 2:
                s_bond += BONDFMT % ('CONECT', n-1, n+1)
            S_CGInd = n; n+= 1
            MapDict[S_CGInd].extend(SInds[i])
            COMPos = np.mean(Pos[SInds[i]], axis = 0)
            s += PDBFMT % (S_CGInd+1, 'S  ', r, string.ascii_uppercase[thisChain], i+1, COMPos[0], COMPos[1], COMPos[2], 1.0, 0.0)
            s += "\n"
    # Write CG PDB
    s += "TER\n" # last TER record
    if genBonds: s += s_bond
    OutPdb = CGPrefix + '.pdb'
    with open(OutPdb, 'w') as of: of.write(s)

    # Map Traj
    if not AATraj is None:
        import sim
        simMap = sim.atommap.PosMap()
        for i in range(NCGAtoms):
            simMap += [sim.atommap.AtomMap(Atoms1 = MapDict[i], Atom2 = i)]
        AtomNames = []
        for r in Seq:
            if r == 'GLY' and not hasPseudoGLY: AtomNames.extend(['N', 'C', 'O'])
            else: AtomNames.extend(['N', 'C', 'O', 'S'])
        print 'Reading from AA Traj...'
        if PrmTop is None: Trj = sim.traj.lammps.Lammps(AATraj) # LammpsTraj
        else: Trj = sim.traj.amber.Amber(AATraj, PrmTop) # ZamTraj
        tmpinit = Trj[0]
        if Trj.FrameData.has_key('BoxL'): BoxL = Trj.FrameData['BoxL']
        else: BoxL = [0., 0., 0.]
        print 'Using Box: ', BoxL
        print 'Writing to CG Lammps Traj...'
        if LastNFrames: print 'Read %d frames, writing last %d frames' % (len(Trj), LastNFrames)
        # Note: the entire traj must be mapped to avoid file write errors
        CGTraj = CGPrefix + '.lammpstrj.gz'
        MappedTrj = sim.traj.mapped.Mapped(Trj, simMap, AtomNames = AtomNames, BoxL = BoxL)
        # now parse out only the necessary portion of the mapped traj
        if LastNFrames: MappedTrj = MappedTrj[-LastNFrames:]
        # now convert to Lammps
        sim.traj.base.Convert(MappedTrj, sim.traj.LammpsWrite, CGTraj, Verbose = True)
    
    # Write Ene File
    if not AmberEne is None:
        print 'Converting Amber Ene File...'
        CGEne = CGPrefix + '.ene.dat.gz'
        of = sim.traj.base.FileOpen(AmberEne, "rb")
        lines = of.readlines()
        start = 10 ; enefield_loc = (6,2)
        Ene = []
        for line in lines[start:]:
            l = line.split()
            if l[0] == 'L%d' % enefield_loc[0]:
                this_ene = float(l[enefield_loc[1]])
                Ene.append(this_ene)
        # parse necessary portion of Ene
        if LastNFrames: Ene = Ene[-LastNFrames:]
        np.savetxt(CGEne, Ene)
        of.close()
    return