Example #1
0
def get_sasa_mmtk(selection, state=-1, hydrogens='auto', quiet=1):
    '''
DESCRIPTION

    Get solvent accesible surface area using MMTK.MolecularSurface

    http://dirac.cnrs-orleans.fr/MMTK/

    This command is very picky with missing atoms and wrong atom naming.

SEE ALSO

    stub2ala, get_sasa, get_sasa_ball
    '''
    try:
        import MMTK
    except ImportError:
        print(' ImportError: please install MMTK')
        raise CmdException

    from MMTK.PDB import PDBConfiguration
    from MMTK.Proteins import Protein
    from MMTK.MolecularSurface import surfaceAndVolume

    try:
        from cStringIO import StringIO
    except ImportError:
        from io import StringIO

    selection = selector.process(selection)
    state, quiet = int(state), int(quiet)
    radius = cmd.get_setting_float('solvent_radius')

    if hydrogens == 'auto':
        if cmd.count_atoms('(%s) and hydro' % selection) > 0:
            hydrogens = 'all'
        else:
            hydrogens = 'no_hydrogens'
    elif hydrogens == 'none':
        hydrogens = 'no_hydrogens'

    conf = PDBConfiguration(StringIO(cmd.get_pdbstr(selection)))
    system = Protein(conf.createPeptideChains(hydrogens))

    try:
        area, volume = surfaceAndVolume(system, radius * 0.1)
    except:
        print(' Error: MMTK.MolecularSurface.surfaceAndVolume failed')
        raise CmdException

    if not quiet:
        print(' get_sasa_mmtk: %.3f Angstroms^2 (volume: %.3f Angstroms^3).' %
              (area * 1e2, volume * 1e3))
    return area * 1e2
Example #2
0
def get_sasa_mmtk(selection, state=-1, hydrogens='auto', quiet=1):
    '''
DESCRIPTION

    Get solvent accesible surface area using MMTK.MolecularSurface

    http://dirac.cnrs-orleans.fr/MMTK/

    This command is very picky with missing atoms and wrong atom naming.

SEE ALSO

    stub2ala, get_sasa, get_sasa_ball
    '''
    try:
        import MMTK
    except ImportError:
        print(' ImportError: please install MMTK')
        raise CmdException

    from MMTK.PDB import PDBConfiguration
    from MMTK.Proteins import Protein
    from MMTK.MolecularSurface import surfaceAndVolume

    try:
        from cStringIO import StringIO
    except ImportError:
        from io import StringIO

    selection = selector.process(selection)
    state, quiet = int(state), int(quiet)
    radius = cmd.get_setting_float('solvent_radius')

    if hydrogens == 'auto':
        if cmd.count_atoms('(%s) and hydro' % selection) > 0:
            hydrogens = 'all'
        else:
            hydrogens = 'no_hydrogens'
    elif hydrogens == 'none':
        hydrogens = 'no_hydrogens'

    conf = PDBConfiguration(StringIO(cmd.get_pdbstr(selection)))
    system = Protein(conf.createPeptideChains(hydrogens))

    try:
        area, volume = surfaceAndVolume(system, radius * 0.1)
    except:
        print(' Error: MMTK.MolecularSurface.surfaceAndVolume failed')
        raise CmdException

    if not quiet:
        print(' get_sasa_mmtk: %.3f Angstroms^2 (volume: %.3f Angstroms^3).' % (area * 1e2, volume * 1e3))
    return area * 1e2
Example #3
0
# A utility function that creates an image of an object by making
# a copy and applying a transformation to the copy.
def makeImage(object, transformation):
    image = deepcopy(object)
    for atom in image.atomList():
        atom.setPosition(transformation(atom.position()))
    return image


# Read PDB configuration and create MMTK objects for all peptide chains.
# A C-alpha model is used to reduce the system size. You can remove
# 'model="calpha"' to get an all-atom model, but for insulin this will
# create more than 380000 atoms for the 27-unit-cell crystal!
conf = PDBConfiguration('insulin.pdb')
chains = Collection(conf.createPeptideChains(model="calpha"))

# Apply non-crystallographic symmetries to construct the asymmetric unit
asu = Collection(chains)
for so in conf.ncs_transformations:
    if not so.given:
        image = makeImage(chains, so)
        asu.addObject(image)

# Apply crystallographic symmetries to construct the unit cell
# Note that the list of crystallographic symmetries includes the
# identity transformation, so the unmodified asu is not added
# to the unit cell.
cell = Collection()
for so in conf.cs_transformations:
    image = makeImage(asu, so)
Example #4
0
def generate_ramachandran(pdb_id):
    rama_GENERAL = "General"
    rama_GLYCINE = "Glycine"
    rama_PROLINE = "Proline"
    rama_PRE_PRO = "Pre-Pro"
    ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]

    # I have used the same colours as RAMPAGE
    # http://raven.bioc.cam.ac.uk/rampage.php
    rama_settings = {"General" : ([0, 0.0005, 0.02, 1],
                      ['#FFFFFF','#B3E8FF','#7FD9FF'],
                      "top500angles/pct/rama/rama500-general.data"),
                      # or rama500-general-nosec.data
         "Glycine" : ([0, 0.002,  0.02, 1],
                      ['#FFFFFF','#FFE8C5','#FFCC7F'],
                      "top500angles/pct/rama/rama500-gly-sym.data"),
                      # or rama500-gly-sym-nosec.data
         "Proline" : ([0, 0.002,  0.02, 1],
                      ['#FFFFFF','#D0FFC5','#7FFF8C'],
                      "top500angles/pct/rama/rama500-pro.data"),
         "Pre-Pro" : ([0, 0.002,  0.02, 1],
                      ['#FFFFFF','#B3E8FF','#7FD9FF'],
                      "top500angles/pct/rama/rama500-prepro.data")}
         #P.S. Also rama500-ala-nosec.data

    def load_data_file(filename) :
        STEP=2
        HALF_STEP=1
        STEP = HALF_STEP*2
        lower_bounds = range(-180, 180, STEP)
        mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP)
        upper_bounds = range(-180+STEP, 180+STEP, STEP)

        data = numpy.array([[0.0 for x in mid_points] for y in mid_points])

        """
        # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30"
        # Number of dimensions: 2
        # For each dimension, 1 to 2: lower_bound  upper_bound  number_of_bins  wrapping
        #   x1: -180.0 180.0 180 true
        #   x2: -180.0 180.0 180 true
        # List of table coordinates and values. (Value is last number on each line.)
        -179.0 -179.0 0.0918642445114388
        -179.0 -177.0 0.07105717866463215
            ...
            """
        input_file = open(filename,"r")
        for line in input_file :
            #Strip the newline character(s) from the end of the line
            if line[-1]=="\n" : line = line[:-1]
            if line[-1]=="\r" : line = line[:-1]
            if line[0]=="#" :
                #comment
                pass
            else :
                #data
                parts = line.split()
                assert len(parts)==3
                
                x1 = float(parts[0]) #phi
                x2 = float(parts[1]) #psi
                value = float(parts[2])
                
                assert x1 == float(int(x1))
                assert x2 == float(int(x2))
                i1 = mid_points.index(int(x1))
                i2 = mid_points.index(int(x2))
                
                data[i1,i2]=value
        input_file.close()
        return (data, lower_bounds, mid_points, upper_bounds)

    #filename = "stat/rama/rama500-general.data"
    #data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
    ##print sum(sum(data))

    r.library("MASS")

    #print "Creating R function",
    r("""
    ramachandran.plot <- function(x.scatter, y.scatter,
        x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid,
        xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE),
        zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels),
        nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) -
            1), plot.title="", plot.axes, key.title, key.axes, asp = NA,
        xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes,
        ...)
    {
        if (missing(z.grid)) {
            stop("no 'z.grid' matrix specified")
        }
        else if (is.list(x.grid)) {
            y.grid <- x.grid$y
            x.grid <- x.grid$x
        }
        if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0))
            stop("increasing 'x.grid' and 'y.grid' values expected")

        plot.new()
        plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)

        if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1)
            stop("no proper 'z.grid' matrix specified")
        if (!is.double(z.grid))
            storage.mode(z.grid) <- "double"
        .filled.contour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels), 
                                col = col)

        if (!(missing(x.scatter)) && !(missing(y.scatter))) {
            plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL),
                    xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp,
                    type="p", pch=20, cex=0.1)
        }
            
        if (missing(plot.axes)) {
            if (axes) {
                title(main=plot.title, xlab=expression(phi), ylab=expression(psi))
                axis(1, at=c(-180,-90,0,90,180))
                axis(2, at=c(-180,-90,0,90,180))
            }
        }
        else plot.axes
        if (frame.plot)
            box()
        if (missing(plot.title))
            title(...)
        else plot.title
        invisible()
    }
    """)
    #print "Done"


    def degrees(rad_angle) :
        """Converts and angle in radians to degrees, mapped to the range [-180,180]"""
        angle = rad_angle * 180 / math.pi
        #Note this assume the radians angle is positive as that's what MMTK does
        while angle > 180 :
            angle = angle - 360
        return angle

    def next_residue(residue) :
        """Expects an MMTK residue, returns the next residue in the chain, or None"""
        #Proteins go N terminal --> C terminal
        #The next reside is bonded to the C of this atom...
        for a in residue.peptide.C.bondedTo():
            if a.parent.parent != residue:
                return a.parent.parent
        return None


    def residue_amino(residue) :
        """Expects an MMTK residue, returns the three letter amino acid code in upper case"""
        if residue :
            return residue.name[0:3].upper()
        else :
            return None

    def residue_ramachandran_type(residue) :
        """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)"""
        if residue_amino(residue)=="GLY" :
            return rama_GLYCINE
        elif residue_amino(residue)=="PRO" :
            return rama_PROLINE
        elif residue_amino(next_residue(residue))=="PRO" :
            #exlcudes those that are Pro or Gly
            return rama_PRE_PRO
        else :
            return rama_GENERAL

    scatter_phi = dict()
    scatter_psi = dict()
    for ramachandran_type in ramachandran_types :
        scatter_phi[ramachandran_type]=[]
        scatter_psi[ramachandran_type]=[]

    pdb_filename = "../data/%s.pdb" % pdb_id

    #print "Loading PDB file: " + pdb_filename
    #protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
    # Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:
    configuration = PDBConfiguration(pdb_filename)
    configuration.deleteHydrogens()
    protein = Protein(configuration.createPeptideChains(model = "no_hydrogens"))
    for chain in protein :
        #print chain.name
        for residue in chain :
            phi, psi = residue.phiPsi()
            #print residue.name, phi, psi
            if phi and psi :
                ramachandran_type = residue_ramachandran_type(residue)
                assert ramachandran_type in ramachandran_types
                scatter_phi[ramachandran_type].append(degrees(phi))
                scatter_psi[ramachandran_type].append(degrees(psi))
            assert len(scatter_phi) == len(scatter_psi)
        
    #print "Done"

    png_filename = "ppii%d%s.png" % (random.randint(0, 1000000), pdb_id)
    png_filepath = "../tmp/" + png_filename
    png_command = 'png("' + png_filepath + '")'

    #print 

    r(png_command)

    #To get four plots on one page, you could use :
    #
    #r.split_screen([2,2]) #split into two by two screen
    #
    #Or:
    #
    #r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
    #
    #But I went for simply:

    #r.par(mfrow=[2,2])

    #for (i,ramachandran_type) in enumerate(ramachandran_types) :
    #pdf_filename = "../%s_%s.pdf" % (pdb_id, ramachandran_type)
    (rama_levels, rama_colors, rama_filename) = rama_settings["General"]
    
    #print "Loading data file: " + rama_filename,
    data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename)
    #print "Done"

    ##print "Creating PDF output file: " + pdf_filename,
    #r.pdf(pdf_filename)
    #r.plot(scatter_phi, scatter_psi)

    #print "Generating quadrant %i, %s" % (i+1, ramachandran_type)
    #r.screen(i+1)

    #Use small margins to make the plots nice and big,
    #and specify a SQUARE plot area (to go with aspect ratio, asp=1)
    #r.par(mar = [2, 2, 2, 2], pty="s")

    #This function will do a Ramachandran plot in the next quadrant
    #which we setup using par(mfrow-...)
    r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type],
                        y_scatter=scatter_psi[ramachandran_type], 
                        x_grid=mid_points, y_grid=mid_points, z_grid=data,
                        xlim=[-180,180], ylim=[-180,180], asp=1.0,
                        plot_title="Ramachandran plot of " + pdb_id, drawlabels=False,
                        levels=rama_levels, col=rama_colors)
    #print ramachandran_type + " Done"

    r("dev.off()")
    #print "Done"
    return '<img src="/~jean/projet/tmp/' + png_filename + '"/>'
Example #5
0
def normalmodes_mmtk(selection,
                     cutoff=12.0,
                     ff='Deformation',
                     first=7,
                     last=10,
                     prefix='mmtk',
                     states=7,
                     factor=-1,
                     quiet=1):
    '''
DESCRIPTION

    Fast normal modes for large proteins using an elastic network model (CA only)

    Based on:
    http://dirac.cnrs-orleans.fr/MMTK/using-mmtk/mmtk-example-scripts/normal-modes/
    '''
    try:
        import MMTK
    except ImportError:
        print('Failed to import MMTK, please add to PYTHONPATH')
        raise CmdException

    selection = selector.process(selection)
    cutoff = float(cutoff)
    first, last = int(first), int(last)
    states, factor, quiet = int(states), float(factor), int(quiet)

    from math import log
    from chempy import cpv

    from MMTK import InfiniteUniverse
    from MMTK.PDB import PDBConfiguration
    from MMTK.Proteins import Protein
    from MMTK.NormalModes import NormalModes

    from MMTK.ForceFields import DeformationForceField, CalphaForceField
    from MMTK.FourierBasis import FourierBasis, estimateCutoff
    from MMTK.NormalModes import NormalModes, SubspaceNormalModes

    model = 'calpha'
    ff = ff.lower()
    if 'deformationforcefield'.startswith(ff):
        forcefield = DeformationForceField(cutoff=cutoff / 10.)
    elif 'calphaforcefield'.startswith(ff):
        forcefield = CalphaForceField(cutoff=cutoff / 10.)
    elif 'amber94forcefield'.startswith(ff):
        from MMTK.ForceFields import Amber94ForceField
        forcefield = Amber94ForceField()
        model = 'all'
    else:
        raise NotImplementedError('unknown ff = ' + str(ff))
    if not quiet:
        print(' Forcefield:', forcefield.__class__.__name__)

    if model == 'calpha':
        selection = '(%s) and polymer and name CA' % (selection)

    f = StringIO(cmd.get_pdbstr(selection))
    conf = PDBConfiguration(f)
    items = conf.createPeptideChains(model)

    universe = InfiniteUniverse(forcefield)
    universe.protein = Protein(*items)

    nbasis = max(10, universe.numberOfAtoms() / 5)
    cutoff, nbasis = estimateCutoff(universe, nbasis)
    if not quiet:
        print(" Calculating %d low-frequency modes." % nbasis)

    if cutoff is None:
        modes = NormalModes(universe)
    else:
        subspace = FourierBasis(universe, cutoff)
        modes = SubspaceNormalModes(universe, subspace)

    natoms = modes.array.shape[1]
    frequencies = modes.frequencies

    if factor < 0:
        factor = log(natoms)
        if not quiet:
            print(' set factor to %.2f' % (factor))

    if True:  # cmd.count_atoms(selection) != natoms:
        import tempfile, os
        from MMTK import DCD
        filename = tempfile.mktemp(suffix='.pdb')
        sequence = DCD.writePDB(universe, None, filename)
        z = [a.index for a in sequence]
        selection = cmd.get_unused_name('_')
        cmd.load(filename, selection, zoom=0)
        os.remove(filename)

        if cmd.count_atoms(selection) != natoms:
            print('hmm... still wrong number of atoms')

    def eigenfacs_iter(mode):
        x = modes[mode - 1].array
        return iter(x.take(z, 0))

    for mode in range(first, min(last, len(modes)) + 1):
        name = prefix + '%d' % mode
        cmd.delete(name)

        if not quiet:
            print(' normalmodes: object "%s" for mode %d with freq. %.6f' % \
                    (name, mode, frequencies[mode-1]))

        for state in range(1, states + 1):
            cmd.create(name, selection, 1, state, zoom=0)
            cmd.alter_state(
                state,
                name,
                '(x,y,z) = cpv.add([x,y,z], cpv.scale(next(myit), myfac))',
                space={
                    'cpv': cpv,
                    'myit': eigenfacs_iter(mode),
                    'next': next,
                    'myfac':
                    1e2 * factor * ((state - 1.0) / (states - 1.0) - 0.5)
                })

    cmd.delete(selection)
    if model == 'calpha':
        cmd.set('ribbon_trace_atoms', 1, prefix + '*')
        cmd.show_as('ribbon', prefix + '*')
    else:
        cmd.show_as('lines', prefix + '*')
Example #6
0
    elif 'amber94forcefield'.startswith(ff):
        from MMTK.ForceFields import Amber94ForceField
        forcefield = Amber94ForceField()
        model = 'all'
    else:
        raise NotImplementedError('unknown ff = ' + str(ff))
    if not quiet:
        print ' Forcefield:', forcefield.__class__.__name__

    if model == 'calpha':
        selection = '(%s) and polymer and name CA' % (selection)

    from cStringIO import StringIO
    f = StringIO(cmd.get_pdbstr(selection))
    conf = PDBConfiguration(f)
    items = conf.createPeptideChains(model)

    universe = InfiniteUniverse(forcefield)
    universe.protein = Protein(*items)

    nbasis = max(10, universe.numberOfAtoms()/5)
    cutoff, nbasis = estimateCutoff(universe, nbasis)
    if not quiet:
        print " Calculating %d low-frequency modes." % nbasis

    if cutoff is None:
        modes = NormalModes(universe)
    else:
        subspace = FourierBasis(universe, cutoff)
        modes = SubspaceNormalModes(universe, subspace)
Example #7
0
#
# Note that this will not necessarily work with any PDB file. Many files
# use non-crystallographic symmetry information in a non-standard way.
# This is usually explained in REMARK records, but those cannot be
# evaluated automatically.
#

from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.Proteins import Protein

# Read PDB configuration and create MMTK objects for all peptide chains.
# A C-alpha model is used to reduce the system size. You can remove
# 'model="calpha"' to get an all-atom model.
conf = PDBConfiguration('insulin.pdb')
chains = Collection(conf.createPeptideChains(model="calpha"))

# Copy and transform the objects representing the asymmetric unit in order
# to obtain the contents of the unit cell.
chains = conf.asuToUnitCell(chains)

# Construct a periodic universe representing the unit cell.
universe = conf.createUnitCellUniverse()

# Add each chain as one protein. If the unit cell contains multimers,
# the chains must be combined into protein objects by hand,
# as no corresponding information can be extracted from the PDB file.
for chain in chains:
    universe.addObject(Protein(chain))

# If VMD has been defined as the PDB viewer, this will not only show
Example #8
0
start = time.time()

#
# First problem: construct an all-atom model from a structure without
# hydrogens. This is the standard problem when using an all-atom force

# field with crystallographic structures.
#
#
# Load the PDB file.
configuration = PDBConfiguration('insulin.pdb')

# Construct the peptide chain objects. This also constructs positions
# for any missing hydrogens, using geometrical criteria.
chains = configuration.createPeptideChains()

# Make the protein object.
#insulin = Protein(chains)

# Define system
universe = InfiniteUniverse(Amber99ForceField(mod_files=['frcmod.ff99SB']))
universe.protein = Protein(chains)

# Initialize velocities
universe.initializeVelocitiesToTemperature(50. * Units.K)
print 'Temperature: ', universe.temperature()
print 'Momentum: ', universe.momentum()
print 'Angular momentum: ', universe.angularMomentum()
file.write('Temperature: ' + str(universe.temperature()) + "\n")
file.write('Momentum: ' + str(universe.momentum()) + "\n")
Example #9
0
        forcefield = CalphaForceField(cutoff=cutoff / 10.)
    elif 'amber94forcefield'.startswith(ff):
        from MMTK.ForceFields import Amber94ForceField
        forcefield = Amber94ForceField()
        model = 'all'
    else:
        raise NotImplementedError('unknown ff = ' + str(ff))
    if not quiet:
        print(' Forcefield:', forcefield.__class__.__name__)

    if model == 'calpha':
        selection = '(%s) and polymer and name CA' % (selection)

    f = StringIO(cmd.get_pdbstr(selection))
    conf = PDBConfiguration(f)
    items = conf.createPeptideChains(model)

    universe = InfiniteUniverse(forcefield)
    universe.protein = Protein(*items)

    nbasis = max(10, universe.numberOfAtoms() / 5)
    cutoff, nbasis = estimateCutoff(universe, nbasis)
    if not quiet:
        print(" Calculating %d low-frequency modes." % nbasis)

    if cutoff is None:
        modes = NormalModes(universe)
    else:
        subspace = FourierBasis(universe, cutoff)
        modes = SubspaceNormalModes(universe, subspace)
Example #10
0
#
# First problem: construct an all-atom model from a structure without
# hydrogens. This is the standard problem when using an all-atom force
# field with crystallographic structures.
#
# Note: the simple solution in this case is just
#       insulin = Protein('insulin.pdb')
# but the explicit form shown below is necessary when any kind of
# modification is required.
#
# Load the PDB file.
configuration = PDBConfiguration('insulin.pdb')

# Construct the peptide chain objects. This also constructs positions
# for any missing hydrogens, using geometrical criteria.
chains = configuration.createPeptideChains()

# Make the protein object.
insulin = Protein(chains)

# Write out the structure with hydrogens to a new file - we will use
# it as an input example later on.
insulin.writeToFile('insulin_with_h.pdb')


#
# Second problem: read a file with hydrogens and create a structure
# without them. This is useful for analysis; if you don't need the
# hydrogens, processing is faster without them. Or you might want
# to compare structures with and without hydrogens.
#
#
# First problem: construct an all-atom model from a structure without
# hydrogens. This is the standard problem when using an all-atom force
# field with crystallographic structures.
#
# Note: the simple solution in this case is just
#       insulin = Protein('insulin.pdb')
# but the explicit form shown below is necessary when any kind of
# modification is required.
#
# Load the PDB file.
configuration = PDBConfiguration('insulin.pdb')

# Construct the peptide chain objects. This also constructs positions
# for any missing hydrogens, using geometrical criteria.
chains = configuration.createPeptideChains()

# Make the protein object.
insulin = Protein(chains)

# Write out the structure with hydrogens to a new file - we will use
# it as an input example later on.
insulin.writeToFile('insulin_with_h.pdb')

#
# Second problem: read a file with hydrogens and create a structure
# without them. This is useful for analysis; if you don't need the
# hydrogens, processing is faster without them. Or you might want
# to compare structures with and without hydrogens.
#
# Load the PDB file.