Example #1
0
    def initialize(self):
        """
        Initialize the input parameters and analysis self variables
        """

        self.configuration["dcd_file"]["instance"] = DCDFile(self.configuration["dcd_file"]['filename'])

        # The number of steps of the analysis.
        self.numberOfSteps = self.configuration['dcd_file']['instance']['n_frames']
 
        # Create all objects from the PDB file.  
        conf = PDBConfiguration(self.configuration['pdb_file']['filename'])

        # Creates a collection of all the chemical objects stored in the PDB file
        molecules = conf.createAll()
                        
        # If the input trajectory has PBC create a periodic universe.
        if self.configuration['dcd_file']['instance']['has_pbc_data']:
            self._universe = ParallelepipedicPeriodicUniverse()
            
        # Otherwise create an infinite universe.
        else:
            self._universe = InfiniteUniverse()
                    
        # The chemical objects found in the PDB file introduced into the universe.
        self._universe.addObject(molecules)

        resolve_undefined_molecules_name(self._universe)
        
        # A MMTK trajectory is opened for writing.
        self._trajectory = Trajectory(self._universe, self.configuration['output_file']['files'][0], mode='w')
        
        # A frame generator is created.        
        self._snapshot = SnapshotGenerator(self._universe, actions=[TrajectoryOutput(self._trajectory, ["all"], 0, None, 1)])
Example #2
0
    def optimize(self):
        """Run the optimization with MMTK"""
        print '\n-------------------------------------------------------------------------------'
        print '\n MMTK Optimization starts...'

        print '\t.. building universe'
        configuration = PDBConfiguration(self.temp_pdb_file)
        # Construct the nucleotide chain object. This also constructs positions
        # for the missing hydrogens, using geometrical criteria.
        chain = configuration.createNucleotideChains()[0]
        universe = InfiniteUniverse()
        universe.addObject(chain)

        restraints = self.create_restraints(chain)

        # define force field
        print '\t.. setting up force field'
        if restraints:
            ff = Amber94ForceField() + restraints
        else:
            ff = Amber94ForceField()
        universe.setForceField(ff)

        # do the minimization
        print '\t.. starting minimization with %i cycles'%self.cycles
        minimizer = ConjugateGradientMinimizer(universe)
        minimizer(steps = self.cycles)
        # write the intermediate output
        print '\t.. writing MMTK output to %s'% self.temp_pdb_file
        if self.model_passive:
            print '\t   (please note that MMTK applies a different numeration of residues.\n\t    The original one will be restored in the final output).'
        universe.writeToFile(self.mmtk_output_file)
        open(self.temp_pdb_file, 'w').write(open(self.mmtk_output_file).read())
        print '\n-------------------------------------------------------------------------------'
Example #3
0
def get_sasa_mmtk(selection, state=-1, hydrogens='auto', quiet=1):
    '''
DESCRIPTION

    Get solvent accesible surface area using MMTK.MolecularSurface

    http://dirac.cnrs-orleans.fr/MMTK/

    This command is very picky with missing atoms and wrong atom naming.

SEE ALSO

    stub2ala, get_sasa, get_sasa_ball
    '''
    try:
        import MMTK
    except ImportError:
        print(' ImportError: please install MMTK')
        raise CmdException

    from MMTK.PDB import PDBConfiguration
    from MMTK.Proteins import Protein
    from MMTK.MolecularSurface import surfaceAndVolume

    try:
        from cStringIO import StringIO
    except ImportError:
        from io import StringIO

    selection = selector.process(selection)
    state, quiet = int(state), int(quiet)
    radius = cmd.get_setting_float('solvent_radius')

    if hydrogens == 'auto':
        if cmd.count_atoms('(%s) and hydro' % selection) > 0:
            hydrogens = 'all'
        else:
            hydrogens = 'no_hydrogens'
    elif hydrogens == 'none':
        hydrogens = 'no_hydrogens'

    conf = PDBConfiguration(StringIO(cmd.get_pdbstr(selection)))
    system = Protein(conf.createPeptideChains(hydrogens))

    try:
        area, volume = surfaceAndVolume(system, radius * 0.1)
    except:
        print(' Error: MMTK.MolecularSurface.surfaceAndVolume failed')
        raise CmdException

    if not quiet:
        print(' get_sasa_mmtk: %.3f Angstroms^2 (volume: %.3f Angstroms^3).' %
              (area * 1e2, volume * 1e3))
    return area * 1e2
Example #4
0
def get_sasa_mmtk(selection, state=-1, hydrogens='auto', quiet=1):
    '''
DESCRIPTION

    Get solvent accesible surface area using MMTK.MolecularSurface

    http://dirac.cnrs-orleans.fr/MMTK/

    This command is very picky with missing atoms and wrong atom naming.

SEE ALSO

    stub2ala, get_sasa, get_sasa_ball
    '''
    try:
        import MMTK
    except ImportError:
        print(' ImportError: please install MMTK')
        raise CmdException

    from MMTK.PDB import PDBConfiguration
    from MMTK.Proteins import Protein
    from MMTK.MolecularSurface import surfaceAndVolume

    try:
        from cStringIO import StringIO
    except ImportError:
        from io import StringIO

    selection = selector.process(selection)
    state, quiet = int(state), int(quiet)
    radius = cmd.get_setting_float('solvent_radius')

    if hydrogens == 'auto':
        if cmd.count_atoms('(%s) and hydro' % selection) > 0:
            hydrogens = 'all'
        else:
            hydrogens = 'no_hydrogens'
    elif hydrogens == 'none':
        hydrogens = 'no_hydrogens'

    conf = PDBConfiguration(StringIO(cmd.get_pdbstr(selection)))
    system = Protein(conf.createPeptideChains(hydrogens))

    try:
        area, volume = surfaceAndVolume(system, radius * 0.1)
    except:
        print(' Error: MMTK.MolecularSurface.surfaceAndVolume failed')
        raise CmdException

    if not quiet:
        print(' get_sasa_mmtk: %.3f Angstroms^2 (volume: %.3f Angstroms^3).' % (area * 1e2, volume * 1e3))
    return area * 1e2
Example #5
0
def parsePDBAtomSelection(filename, traj):

    univ = traj.universe
    pdb = PDBConfiguration(filename)

    # find objects in UNIV and PDB with the same number of atoms
    total = 0
    pdb_index = 0
    pdb_collection = {}
    for object in range(len(univ)):
        natom = len(univ[object].atomList())
        pdb_natom = 0
        while pdb_natom < natom:
            pdb_object = pdb.objects[pdb_index]
            if dir(pdb_object).count('atom_list') == 1:  # no groups
                try:
                    pdb_collection[object].append(pdb_object)
                except KeyError:
                    pdb_collection[object] = [pdb_object]
                pdb_natom = pdb_natom + len(pdb_object.atom_list)
                # chains ?
            elif dir(pdb_object).count('residues') == 1:  # biopolymers
                for residue in pdb_object.residues:
                    try:
                        pdb_collection[object].append(residue)
                    except KeyError:
                        pdb_collection[object] = [residue]
                    pdb_natom = pdb_natom + len(residue.atom_list)
            pdb_index = pdb_index + 1
        if pdb_natom != natom: return None  ######### ERROR

    # match PDBMAP names from UNIV against PDB atom names
    # and add selected atoms to the collection
    selection = []
    for object in range(len(univ)):
        pdbmap = []
        if univ[object].__class__.__name__ == 'Protein':
            for chain in univ[object].chains:
                for residue in chain[0]:
                    pdbmap.append(residue)
        elif univ[object].__class__.__name__ == 'Molecule':
            pdbmap.append(univ[object])
        for item in range(len(pdbmap)):
            pdb_item = pdb_collection[object][item]
            atom_list = pdbmap[item].atomList()
            if upper(pdbmap[item].pdbmap[0][0]) != upper(pdb_item.name):
                return None  ######### ERROR
            for ia in pdb_item.atoms.keys():
                atom = pdb_item.atoms[ia]
                if atom.properties['element'] == '*':
                    anum = pdbmap[item].pdbmap[0][1][ia].number
                    atra = atom_list[anum]
                    selection.append(atra)

    return Collection(selection)
Example #6
0
    def setUp(self):
        cif_data = MMCIFStructureFactorData('2onx-sf.cif.gz', fill=True)
        self.reflections = cif_data.reflections

        conf = PDBConfiguration('2ONX.pdb.gz')
        factory = PDBMoleculeFactory(conf)
        self.universe = factory.retrieveUnitCell()
        assert self.reflections.cell.isCompatibleWith(self.universe, 1.e-3)

        self.adps = ParticleTensor(self.universe)
        for atom in self.universe.atomList():
            b = atom.temperature_factor / (8. * N.pi**2)
            self.adps[atom] = b * delta
Example #7
0
def run(pdb1, pdb2, trajectory, nsteps, delpdb=0):
    universe = TransitionPathUniverse(CalphaForceField(2.5))
    universe.protein = Protein(pdb1, model='calpha')
    conf1 = copy(universe.configuration())
    struct2 = PDBConfiguration(pdb2).peptide_chains
    for i in range(len(universe.protein)):
        struct2[i].applyTo(universe.protein[i])
    if delpdb:
        os.unlink(pdb1)
        os.unlink(pdb2)
    tr, rms = universe.findTransformation(conf1)
    universe.applyTransformation(tr)
    conf2 =  copy(universe.configuration())
    universe.setBoundingBox(conf1, conf2)
    path = TransitionPath(universe, conf1, conf2, step_length=0.05, nmodes=50)
    path.refine(nsteps)
    path.writeBestToTrajectory(trajectory,
                               ("Transition path after %d steps, " % nsteps) +
                               ("energy %d," % path.best_penalty))
Example #8
0
from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.Proteins import PeptideChain, Protein

#
# First problem: construct an all-atom model from a structure without
# hydrogens. This is the standard problem when using an all-atom force
# field with crystallographic structures.
#
# Note: the simple solution in this case is just
#       insulin = Protein('insulin.pdb')
# but the explicit form shown below is necessary when any kind of
# modification is required.
#
# Load the PDB file.
configuration = PDBConfiguration('insulin.pdb')

# Construct the peptide chain objects. This also constructs positions
# for any missing hydrogens, using geometrical criteria.
chains = configuration.createPeptideChains()

# Make the protein object.
insulin = Protein(chains)

# Write out the structure with hydrogens to a new file - we will use
# it as an input example later on.
insulin.writeToFile('insulin_with_h.pdb')


#
# Second problem: read a file with hydrogens and create a structure
Example #9
0
def parsePDBReference(filename, pattern, verbose=None):
    """ for a given filename of a PDB file match atoms against
    objects in a list <pattern> """

    pdb = PDBConfiguration(filename)
    if verbose:
        print 'A quick look into an MMTK pattern reveals ',
        print 'the presence of\n\t', len(pattern),
        print ' atom collections ready to be matched'
    tokens = getTokens(filename)
    if len(tokens) > 0 and verbose:
        print 'RE pattern defined in your PDB file: '
        print tokens
    pdb_collection = {}
    for object in range(len(pattern)):
        natom = pattern[object].numberOfAtoms()
        pdb_natom = 0
        for pdb_object in pdb.objects:
            if hasattr(pdb_object, 'atom_list'):
                gj = 0
                for atom in pdb_object.atom_list:
                    if atom.properties['element'] == '*': gj = gj + 1
                if gj > 0:
                    try:
                        pdb_collection[object].append(pdb_object)
                    except KeyError:
                        pdb_collection[object] = [pdb_object]
                        pdb_natom = pdb_natom + gj
            if hasattr(pdb_object, 'residues'):
                for residue in pdb_object.residues:
                    gj = 0
                    for atom in residue.atom_list:
                        if atom.properties['element'] == '*': gj = gj + 1
                    if gj > 0:
                        try:
                            pdb_collection[object].append(residue)
                        except KeyError:
                            pdb_collection[object] = [residue]
                            pdb_natom = pdb_natom + gj
        if pdb_natom < natom:
            print 'Warning: fewer atoms in PDB for object:',
            print pattern[object]
        elif pdb_natom > natom:
            print 'ERROR: the number of atoms does not match'
            print object, len(pattern), len(pdb.objects)
            print pattern[object].atomList()
            print pdb_natom, pdb_collection[object]
            return None

    selection = []
    for object in range(len(pattern)):
        coll = []
        pdbmap = []
        type_name = pattern[object].__class__.__name__
        if verbose:
            print '\n\nPROCESSING ', pattern[object],
            print ' of type: ', type_name
            print '----------\n'
        if type_name == 'Protein':
            for chain in pattern[object].chains:
                for residue in chain[0]:
                    pdbmap.append(residue)
        elif type_name == 'SubChain':
            for residue in pattern[object]:
                pdbmap.append(residue)
        elif type_name == 'Residue' or type_name == 'Molecule' or \
             type_name == 'Collection':
            pdbmap.append(pattern[object])

        if type_name == 'Protein' or type_name == 'SubChain' or\
           type_name == 'Residue':
            for it in range(len(pdbmap)):
                pdb_item = pdb_collection[object][it]
                atom_list = pdbmap[it].atomList()
                res_list = []
                for ia in pdbmap[it].pdbmap:
                    res_list.append(ia[0])
                res_list = map(upper, res_list)
                if res_list.count(upper(pdb_item.name)) == 0:
                    print 'ERROR: problem with matching'
                    print pdb_item.name, res_list
                    return None
                for ia in pdb_item.atoms.keys():
                    atom = pdb_item.atoms[ia]
                    if atom.properties['element'] == '*':
                        anum = pdbmap[it].pdbmap[0][1][ia].number
                        atra = atom_list[anum]
                        oldpos = atra.position()
                        atra.setPosition(atom.position / 10.)
                        newpos = atra.position()
                        coll.append(atra)
                        if verbose:
                            quickCheck(pdb_item, atom, atra, oldpos, newpos)
            selection.append(Collection(coll))
        elif type_name == 'Molecule' or type_name == 'Collection':
            for it in range(len(pdb_collection[object])):
                pdb_item = pdb_collection[object][it]
                atom_list = pdbmap[0].atomList()
                res_list = []
                gj = 0
                for ia in pdbmap[0].pdbmap:
                    if evalREPattern(pdb_item.name, upper(ia[0]), tokens):
                        gj = 1
                        break
                if not gj:
                    print 'ERROR: problem with matching'
                    return None
                atname_list = pdb_item.atoms.keys()
                for ia in range(len(atname_list)):
                    atom = pdb_item.atoms[atname_list[ia]]
                    if atom.properties['element'] == '*' and \
                       evalREPattern(pdb_item.name,
                                     upper(pdbmap[0].pdbmap[it][0]),tokens):
                        mmtk_map = pdbmap[0].pdbmap[it][1]
                        for ib in mmtk_map.keys():
                            #if evalREPattern(atname_list[ia],ib,tokens):
                            if atname_list[ia] == ib:
                                aname = ib
                                break
                        anum = pdbmap[0].pdbmap[it][1][aname].number
                        atra = atom_list[anum]
                        oldpos = atra.position()
                        atra.setPosition(atom.position / 10.)
                        newpos = atra.position()
                        coll.append(atra)
                        if verbose:
                            quickCheck(pdb_item, atom, atra, oldpos, newpos)
            selection.append(Collection(coll))
    info = {'MMTK': pattern, 'PDB': selection}
    if verbose:
        print len(Collection(selection).atomList()),
        print 'MMTK atoms in ', len(pattern), 'objects',
        print 'were mapped successfully on PDB atoms in file ', filename
    return info
Example #10
0
# This is a slightly modified version of analysis.py. It uses only the
# C-alpha atoms of the peptide chains, and it discards the primary sequence
# information. This makes it faster (fewer atoms) and suitable for comparing
# proteins of similar fold but with different primary sequences.
#

from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.Proteins import PeptideChain

# First we read the two PDB files.

configuration1 = PDBConfiguration('4q21.pdb.gz')
configuration2 = PDBConfiguration('6q21.pdb.gz')

# Set all residue names to GLY. This permits the comparison
# and superposition of proteins with different sequences. Since
# a C-alpha model is used, the side chains are thrown away anyway.
for conf in [configuration1, configuration2]:
    for chain in conf.peptide_chains:
        for residue in chain:
            residue.name = 'GLY'

# The first file contains a monomer, the second one a tetramer in
# which each chain is almost identical to the monomer from the first
# file. We have to cut off the last (incomplete) residue from the
# monomer and the last three residues of each chain of the tetramers
# to get matching sequences. We'll just deal with one of the chains of
# the tetramer here.

monomer = configuration1.peptide_chains[0]
Example #11
0
# the crystallographic unit cell from the information in a PDB file.
#
# Note that this will not necessarily work with any PDB file. Many files
# use non-crystallographic symmetry information in a non-standard way.
# This is usually explained in REMARK records, but those cannot be
# evaluated automatically.
#

from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.Proteins import Protein

# Read PDB configuration and create MMTK objects for all peptide chains.
# A C-alpha model is used to reduce the system size. You can remove
# 'model="calpha"' to get an all-atom model.
conf = PDBConfiguration('insulin.pdb')
chains = Collection(conf.createPeptideChains(model="calpha"))

# Copy and transform the objects representing the asymmetric unit in order
# to obtain the contents of the unit cell.
chains = conf.asuToUnitCell(chains)

# Construct a periodic universe representing the unit cell.
universe = conf.createUnitCellUniverse()

# Add each chain as one protein. If the unit cell contains multimers,
# the chains must be combined into protein objects by hand,
# as no corresponding information can be extracted from the PDB file.
for chain in chains:
    universe.addObject(Protein(chain))
                dist = Numeric.sqrt(dist2)
                if dist >= contact_factor*(a1.vdW_radius + a2.vdW_radius + cutoff):
                    continue
                if not done.has_key((index1, index2)):
                    clist.append(Contact(a1, a2, dist))
                    done[(index1, index2)] = 1
    return clist

if __name__ == '__main__':
    
    from MMTK.PDB import PDBConfiguration
    from MMTK import Units
    import sys

    target_filename = sys.argv[2]
    pdb_conf1 = PDBConfiguration(target_filename)
    if sys.argv[1][:2] == '-f':
        chains = pdb_conf1.createNucleotideChains()
        molecule_names = []
        if len(chains) >= 2:
            clist = findContacts(chains[0], chains[1])
        else:
            molecule_names = []
            for (key, mol) in pdb_conf1.molecules.items():
                for o in mol:
                    molecule_names.append(o.name)
            targets = pdb_conf1.createAll(molecule_names = molecule_names)
            if len(molecule_names) > 1:
                clist = findContacts(targets[0], targets[1])
            else:
                atoms = targets.atomList()
""" nucleotide_construction.py creates a nucleotide chain with a ligand from PDB file """

from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.NucleicAcids import NucleotideChain
from MMTK.Visualization import view

""" Load PDB entry 110d. It contains a single DNA strand with a ligand daunomycin """
configuration = PDBConfiguration('110d.pdb')

""" Construct nucleotide chain object. This also constructs positions for missing hydrogens, using geometrical criteria. """

chain = configuration.createNucleotideChains()[0]

""" Construct the ligand. There is no definition of it in the database, so it can only be constructed as a collection of atoms. The second argument of createMolecules() is set to one in order to allow this use of an unknown residue. """
ligand = configuration.createMolecules(['DM1'], 1)

# Put everything in a universe and show it graphically
universe = InfiniteUniverse()
universe.addObject(chain)
universe.addObject(ligand)

view(universe)
from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.Proteins import Protein
from MMTK.ForceFields import CalphaForceField
from MMTK.NormalModes import EnergeticModes
import pylab
import numpy

# Make a universe for the first configuration
universe = InfiniteUniverse(CalphaForceField())
universe.protein = Protein('1SU4.pdb', model='calpha')
conf_1SU4 = universe.copyConfiguration()

# Apply the second configuration and do a rigid-body superposition fit
PDBConfiguration('1T5T.pdb').applyTo(universe.protein)
tr, rms = universe.findTransformation(conf_1SU4)
universe.applyTransformation(tr)
conf_1T5T = universe.copyConfiguration()

# Set first configuration and calculate normal modes
universe.setConfiguration(conf_1SU4)
modes = EnergeticModes(universe, 300. * Units.K)

# Calculate the normalized difference vector
diff = (conf_1T5T - conf_1SU4).scaledToNorm(1.)

# Calculate the squared overlaps for all modes
mode_numbers = numpy.arange(6, len(modes))
overlaps = [modes.rawMode(i).dotProduct(diff)**2 for i in mode_numbers]
Example #15
0
        forcefield = CalphaForceField(cutoff=cutoff/10.)
    elif 'amber94forcefield'.startswith(ff):
        from MMTK.ForceFields import Amber94ForceField
        forcefield = Amber94ForceField()
        model = 'all'
    else:
        raise NotImplementedError('unknown ff = ' + str(ff))
    if not quiet:
        print ' Forcefield:', forcefield.__class__.__name__

    if model == 'calpha':
        selection = '(%s) and polymer and name CA' % (selection)

    from cStringIO import StringIO
    f = StringIO(cmd.get_pdbstr(selection))
    conf = PDBConfiguration(f)
    items = conf.createPeptideChains(model)

    universe = InfiniteUniverse(forcefield)
    universe.protein = Protein(*items)

    nbasis = max(10, universe.numberOfAtoms()/5)
    cutoff, nbasis = estimateCutoff(universe, nbasis)
    if not quiet:
        print " Calculating %d low-frequency modes." % nbasis

    if cutoff is None:
        modes = NormalModes(universe)
    else:
        subspace = FourierBasis(universe, cutoff)
        modes = SubspaceNormalModes(universe, subspace)
Example #16
0
                                             cutoff):
                    continue
                if not done.has_key((index1, index2)):
                    clist.append(Contact(a1, a2, dist))
                    done[(index1, index2)] = 1
    return clist


if __name__ == '__main__':

    from MMTK.PDB import PDBConfiguration
    from MMTK import Units
    import sys

    target_filename = sys.argv[2]
    pdb_conf1 = PDBConfiguration(target_filename)
    if sys.argv[1][:2] == '-f':
        chains = pdb_conf1.createNucleotideChains()
        molecule_names = []
        if len(chains) >= 2:
            clist = findContacts(chains[0], chains[1])
        else:
            molecule_names = []
            for (key, mol) in pdb_conf1.molecules.items():
                for o in mol:
                    molecule_names.append(o.name)
            targets = pdb_conf1.createAll(molecule_names=molecule_names)
            if len(molecule_names) > 1:
                clist = findContacts(targets[0], targets[1])
            else:
                atoms = targets.atomList()
Example #17
0
from MMTK import *
from MMTK.PDB import PDBConfiguration

# A utility function that creates an image of an object by making
# a copy and applying a transformation to the copy.
def makeImage(object, transformation):
    image = deepcopy(object) 
    for atom in image.atomList():
        atom.setPosition(transformation(atom.position()))
    return image

# Read PDB configuration and create MMTK objects for all peptide chains.
# A C-alpha model is used to reduce the system size. You can remove
# 'model="calpha"' to get an all-atom model, but for insulin this will
# create more than 380000 atoms for the 27-unit-cell crystal!
conf = PDBConfiguration('insulin.pdb')
chains = Collection(conf.createPeptideChains(model="calpha"))

# Apply non-crystallographic symmetries to construct the asymmetric unit
asu = Collection(chains)
for so in conf.ncs_transformations:
    if not so.given:
        image = makeImage(chains, so)
        asu.addObject(image)

# Apply crystallographic symmetries to construct the unit cell
# Note that the list of crystallographic symmetries includes the
# identity transformation, so the unmodified asu is not added
# to the unit cell.
cell = Collection()
for so in conf.cs_transformations:
        pos1 = atom_data[i]
        tot_rad = pos1[3]
        (points1, points_unit) = atom_surf(nbors, i, atom_data, pos1,
                                           tot_rad, point_density, tess1,
                                           ret_fmt >= 2)
        surf_points[a1] = _xlate_results(points1, points_unit, point_density,
                                         tot_rad, pos1, ret_fmt, cent)
    return surf_points

if __name__ == '__main__':
    from MMTK.PDB import PDBConfiguration
    from MMTK import Units
    import profile
    # Load the PDB sequences
    target_filename = sys.argv[1]
    pdb_conf1 = PDBConfiguration(target_filename)
    molecule_names = []
    if 0:               # enable to include all molecules in file
        for (key, mol) in pdb_conf1.molecules.items():
            for o in mol:
                molecule_names.append(o.name)
    target = pdb_conf1.createAll(molecule_names = molecule_names)
    atoms = []
    for a in target.atomList():
        if a.symbol != 'H':
            atoms.append(a)
            if a.index is None:
                a.index = len(atoms)
    point_density = 1026
    s = surface_atoms(atoms, solvent_radius = 1.4*Units.Ang,
                      point_density = point_density, ret_fmt = 4)
Example #19
0
# Retrieve atomic fluctuation information from a PDBConfiguration object.
#

from MMTK import *
from MMTK.PDB import PDBConfiguration
from Scientific import N

# Read a PDB file containing ANISOU records.
conf = PDBConfiguration('1G66.pdb')

# By passing the applyTo methods of MMTK.PDB a dictionary argument
# atom_map, one obtains a dictionary from MMTK atom objects to the
# corresponding PDB atom objects. This dictionary can be used to
# retrieve additional atom data from the PDB file.
atom_map = {}
for c in conf.peptide_chains:
    # Create a PeptideChain object
    chain = c.createPeptideChain()
    # Retrieve the atom_map dictionary
    # Note: this also redefines the configuration, but in this
    # application that makes no difference since it is the same
    # that was defined in the previous line.
    c.applyTo(chain, atom_map=atom_map)

    # Print the B factor and the trace of the anisotropic displacement
    # tensor for each atom for which both are available. They should
    # be equal, but due to the limited precision in PDB files
    # there can be small differences.
    for atom in chain.atomList():
        try:
            pdb_atom = atom_map[atom]
from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.Proteins import PeptideChain, Protein

#
# First problem: construct an all-atom model from a structure without
# hydrogens. This is the standard problem when using an all-atom force
# field with crystallographic structures.
#
# Note: the simple solution in this case is just
#       insulin = Protein('insulin.pdb')
# but the explicit form shown below is necessary when any kind of
# modification is required.
#
# Load the PDB file.
configuration = PDBConfiguration('insulin.pdb')

# Construct the peptide chain objects. This also constructs positions
# for any missing hydrogens, using geometrical criteria.
chains = configuration.createPeptideChains()

# Make the protein object.
insulin = Protein(chains)

# Write out the structure with hydrogens to a new file - we will use
# it as an input example later on.
insulin.writeToFile('insulin_with_h.pdb')

#
# Second problem: read a file with hydrogens and create a structure
# without them. This is useful for analysis; if you don't need the
Example #21
0
def normalmodes_mmtk(selection,
                     cutoff=12.0,
                     ff='Deformation',
                     first=7,
                     last=10,
                     prefix='mmtk',
                     states=7,
                     factor=-1,
                     quiet=1):
    '''
DESCRIPTION

    Fast normal modes for large proteins using an elastic network model (CA only)

    Based on:
    http://dirac.cnrs-orleans.fr/MMTK/using-mmtk/mmtk-example-scripts/normal-modes/
    '''
    try:
        import MMTK
    except ImportError:
        print('Failed to import MMTK, please add to PYTHONPATH')
        raise CmdException

    selection = selector.process(selection)
    cutoff = float(cutoff)
    first, last = int(first), int(last)
    states, factor, quiet = int(states), float(factor), int(quiet)

    from math import log
    from chempy import cpv

    from MMTK import InfiniteUniverse
    from MMTK.PDB import PDBConfiguration
    from MMTK.Proteins import Protein
    from MMTK.NormalModes import NormalModes

    from MMTK.ForceFields import DeformationForceField, CalphaForceField
    from MMTK.FourierBasis import FourierBasis, estimateCutoff
    from MMTK.NormalModes import NormalModes, SubspaceNormalModes

    model = 'calpha'
    ff = ff.lower()
    if 'deformationforcefield'.startswith(ff):
        forcefield = DeformationForceField(cutoff=cutoff / 10.)
    elif 'calphaforcefield'.startswith(ff):
        forcefield = CalphaForceField(cutoff=cutoff / 10.)
    elif 'amber94forcefield'.startswith(ff):
        from MMTK.ForceFields import Amber94ForceField
        forcefield = Amber94ForceField()
        model = 'all'
    else:
        raise NotImplementedError('unknown ff = ' + str(ff))
    if not quiet:
        print(' Forcefield:', forcefield.__class__.__name__)

    if model == 'calpha':
        selection = '(%s) and polymer and name CA' % (selection)

    f = StringIO(cmd.get_pdbstr(selection))
    conf = PDBConfiguration(f)
    items = conf.createPeptideChains(model)

    universe = InfiniteUniverse(forcefield)
    universe.protein = Protein(*items)

    nbasis = max(10, universe.numberOfAtoms() / 5)
    cutoff, nbasis = estimateCutoff(universe, nbasis)
    if not quiet:
        print(" Calculating %d low-frequency modes." % nbasis)

    if cutoff is None:
        modes = NormalModes(universe)
    else:
        subspace = FourierBasis(universe, cutoff)
        modes = SubspaceNormalModes(universe, subspace)

    natoms = modes.array.shape[1]
    frequencies = modes.frequencies

    if factor < 0:
        factor = log(natoms)
        if not quiet:
            print(' set factor to %.2f' % (factor))

    if True:  # cmd.count_atoms(selection) != natoms:
        import tempfile, os
        from MMTK import DCD
        filename = tempfile.mktemp(suffix='.pdb')
        sequence = DCD.writePDB(universe, None, filename)
        z = [a.index for a in sequence]
        selection = cmd.get_unused_name('_')
        cmd.load(filename, selection, zoom=0)
        os.remove(filename)

        if cmd.count_atoms(selection) != natoms:
            print('hmm... still wrong number of atoms')

    def eigenfacs_iter(mode):
        x = modes[mode - 1].array
        return iter(x.take(z, 0))

    for mode in range(first, min(last, len(modes)) + 1):
        name = prefix + '%d' % mode
        cmd.delete(name)

        if not quiet:
            print(' normalmodes: object "%s" for mode %d with freq. %.6f' % \
                    (name, mode, frequencies[mode-1]))

        for state in range(1, states + 1):
            cmd.create(name, selection, 1, state, zoom=0)
            cmd.alter_state(
                state,
                name,
                '(x,y,z) = cpv.add([x,y,z], cpv.scale(next(myit), myfac))',
                space={
                    'cpv': cpv,
                    'myit': eigenfacs_iter(mode),
                    'next': next,
                    'myfac':
                    1e2 * factor * ((state - 1.0) / (states - 1.0) - 0.5)
                })

    cmd.delete(selection)
    if model == 'calpha':
        cmd.set('ribbon_trace_atoms', 1, prefix + '*')
        cmd.show_as('ribbon', prefix + '*')
    else:
        cmd.show_as('lines', prefix + '*')
Example #22
0
# This example shows how a universe can be built from a PDB file in such
# a way that all objects in the PDB file are represented as well as
# possible, using AtomCluster objects when nothing more specific can
# be constructed.
#
# This procedure is the only way to construct a universe that uses the
# same internal atom order as the PDB file, which is important for
# data exchange with other programs.

from MMTK import *
from MMTK.PDB import PDBConfiguration

configuration = PDBConfiguration('some_file.pdb')
universe = InfiniteUniverse()
universe.addObject(configuration.createAll(None, 1))
Example #23
0
        pos1 = atom_data[i]
        tot_rad = pos1[3]
        (points1, points_unit) = atom_surf(nbors, i, atom_data, pos1,
                                           tot_rad, point_density, tess1,
                                           ret_fmt >= 2)
        surf_points[a1] = _xlate_results(points1, points_unit, point_density,
                                         tot_rad, pos1, ret_fmt, cent)
    return surf_points

if __name__ == '__main__':
    from MMTK.PDB import PDBConfiguration
    from MMTK import Units
    import profile
    # Load the PDB sequences
    target_filename = sys.argv[1]
    pdb_conf1 = PDBConfiguration(target_filename)
    molecule_names = []
    if 0:               # enable to include all molecules in file
        for (key, mol) in pdb_conf1.molecules.items():
            for o in mol:
                molecule_names.append(o.name)
    target = pdb_conf1.createAll(molecule_names = molecule_names)
    atoms = []
    for a in target.atomList():
        if a.symbol != 'H':
            atoms.append(a)
            if a.index is None:
                a.index = len(atoms)
    point_density = 1026
    s = surface_atoms(atoms, solvent_radius = 1.4*Units.Ang,
                      point_density = point_density, ret_fmt = 4)
Example #24
0
def _get_molecules(pdb_file):
    configuration = PDBConfiguration(pdb_file)
    molecules = configuration.createAll()
    return molecules
Example #25
0
def generate_ramachandran(pdb_id):
    rama_GENERAL = "General"
    rama_GLYCINE = "Glycine"
    rama_PROLINE = "Proline"
    rama_PRE_PRO = "Pre-Pro"
    ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]

    # I have used the same colours as RAMPAGE
    # http://raven.bioc.cam.ac.uk/rampage.php
    rama_settings = {"General" : ([0, 0.0005, 0.02, 1],
                      ['#FFFFFF','#B3E8FF','#7FD9FF'],
                      "top500angles/pct/rama/rama500-general.data"),
                      # or rama500-general-nosec.data
         "Glycine" : ([0, 0.002,  0.02, 1],
                      ['#FFFFFF','#FFE8C5','#FFCC7F'],
                      "top500angles/pct/rama/rama500-gly-sym.data"),
                      # or rama500-gly-sym-nosec.data
         "Proline" : ([0, 0.002,  0.02, 1],
                      ['#FFFFFF','#D0FFC5','#7FFF8C'],
                      "top500angles/pct/rama/rama500-pro.data"),
         "Pre-Pro" : ([0, 0.002,  0.02, 1],
                      ['#FFFFFF','#B3E8FF','#7FD9FF'],
                      "top500angles/pct/rama/rama500-prepro.data")}
         #P.S. Also rama500-ala-nosec.data

    def load_data_file(filename) :
        STEP=2
        HALF_STEP=1
        STEP = HALF_STEP*2
        lower_bounds = range(-180, 180, STEP)
        mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP)
        upper_bounds = range(-180+STEP, 180+STEP, STEP)

        data = numpy.array([[0.0 for x in mid_points] for y in mid_points])

        """
        # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30"
        # Number of dimensions: 2
        # For each dimension, 1 to 2: lower_bound  upper_bound  number_of_bins  wrapping
        #   x1: -180.0 180.0 180 true
        #   x2: -180.0 180.0 180 true
        # List of table coordinates and values. (Value is last number on each line.)
        -179.0 -179.0 0.0918642445114388
        -179.0 -177.0 0.07105717866463215
            ...
            """
        input_file = open(filename,"r")
        for line in input_file :
            #Strip the newline character(s) from the end of the line
            if line[-1]=="\n" : line = line[:-1]
            if line[-1]=="\r" : line = line[:-1]
            if line[0]=="#" :
                #comment
                pass
            else :
                #data
                parts = line.split()
                assert len(parts)==3
                
                x1 = float(parts[0]) #phi
                x2 = float(parts[1]) #psi
                value = float(parts[2])
                
                assert x1 == float(int(x1))
                assert x2 == float(int(x2))
                i1 = mid_points.index(int(x1))
                i2 = mid_points.index(int(x2))
                
                data[i1,i2]=value
        input_file.close()
        return (data, lower_bounds, mid_points, upper_bounds)

    #filename = "stat/rama/rama500-general.data"
    #data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
    ##print sum(sum(data))

    r.library("MASS")

    #print "Creating R function",
    r("""
    ramachandran.plot <- function(x.scatter, y.scatter,
        x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid,
        xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE),
        zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels),
        nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) -
            1), plot.title="", plot.axes, key.title, key.axes, asp = NA,
        xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes,
        ...)
    {
        if (missing(z.grid)) {
            stop("no 'z.grid' matrix specified")
        }
        else if (is.list(x.grid)) {
            y.grid <- x.grid$y
            x.grid <- x.grid$x
        }
        if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0))
            stop("increasing 'x.grid' and 'y.grid' values expected")

        plot.new()
        plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)

        if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1)
            stop("no proper 'z.grid' matrix specified")
        if (!is.double(z.grid))
            storage.mode(z.grid) <- "double"
        .filled.contour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels), 
                                col = col)

        if (!(missing(x.scatter)) && !(missing(y.scatter))) {
            plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL),
                    xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp,
                    type="p", pch=20, cex=0.1)
        }
            
        if (missing(plot.axes)) {
            if (axes) {
                title(main=plot.title, xlab=expression(phi), ylab=expression(psi))
                axis(1, at=c(-180,-90,0,90,180))
                axis(2, at=c(-180,-90,0,90,180))
            }
        }
        else plot.axes
        if (frame.plot)
            box()
        if (missing(plot.title))
            title(...)
        else plot.title
        invisible()
    }
    """)
    #print "Done"


    def degrees(rad_angle) :
        """Converts and angle in radians to degrees, mapped to the range [-180,180]"""
        angle = rad_angle * 180 / math.pi
        #Note this assume the radians angle is positive as that's what MMTK does
        while angle > 180 :
            angle = angle - 360
        return angle

    def next_residue(residue) :
        """Expects an MMTK residue, returns the next residue in the chain, or None"""
        #Proteins go N terminal --> C terminal
        #The next reside is bonded to the C of this atom...
        for a in residue.peptide.C.bondedTo():
            if a.parent.parent != residue:
                return a.parent.parent
        return None


    def residue_amino(residue) :
        """Expects an MMTK residue, returns the three letter amino acid code in upper case"""
        if residue :
            return residue.name[0:3].upper()
        else :
            return None

    def residue_ramachandran_type(residue) :
        """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)"""
        if residue_amino(residue)=="GLY" :
            return rama_GLYCINE
        elif residue_amino(residue)=="PRO" :
            return rama_PROLINE
        elif residue_amino(next_residue(residue))=="PRO" :
            #exlcudes those that are Pro or Gly
            return rama_PRE_PRO
        else :
            return rama_GENERAL

    scatter_phi = dict()
    scatter_psi = dict()
    for ramachandran_type in ramachandran_types :
        scatter_phi[ramachandran_type]=[]
        scatter_psi[ramachandran_type]=[]

    pdb_filename = "../data/%s.pdb" % pdb_id

    #print "Loading PDB file: " + pdb_filename
    #protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
    # Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:
    configuration = PDBConfiguration(pdb_filename)
    configuration.deleteHydrogens()
    protein = Protein(configuration.createPeptideChains(model = "no_hydrogens"))
    for chain in protein :
        #print chain.name
        for residue in chain :
            phi, psi = residue.phiPsi()
            #print residue.name, phi, psi
            if phi and psi :
                ramachandran_type = residue_ramachandran_type(residue)
                assert ramachandran_type in ramachandran_types
                scatter_phi[ramachandran_type].append(degrees(phi))
                scatter_psi[ramachandran_type].append(degrees(psi))
            assert len(scatter_phi) == len(scatter_psi)
        
    #print "Done"

    png_filename = "ppii%d%s.png" % (random.randint(0, 1000000), pdb_id)
    png_filepath = "../tmp/" + png_filename
    png_command = 'png("' + png_filepath + '")'

    #print 

    r(png_command)

    #To get four plots on one page, you could use :
    #
    #r.split_screen([2,2]) #split into two by two screen
    #
    #Or:
    #
    #r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
    #
    #But I went for simply:

    #r.par(mfrow=[2,2])

    #for (i,ramachandran_type) in enumerate(ramachandran_types) :
    #pdf_filename = "../%s_%s.pdf" % (pdb_id, ramachandran_type)
    (rama_levels, rama_colors, rama_filename) = rama_settings["General"]
    
    #print "Loading data file: " + rama_filename,
    data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename)
    #print "Done"

    ##print "Creating PDF output file: " + pdf_filename,
    #r.pdf(pdf_filename)
    #r.plot(scatter_phi, scatter_psi)

    #print "Generating quadrant %i, %s" % (i+1, ramachandran_type)
    #r.screen(i+1)

    #Use small margins to make the plots nice and big,
    #and specify a SQUARE plot area (to go with aspect ratio, asp=1)
    #r.par(mar = [2, 2, 2, 2], pty="s")

    #This function will do a Ramachandran plot in the next quadrant
    #which we setup using par(mfrow-...)
    r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type],
                        y_scatter=scatter_psi[ramachandran_type], 
                        x_grid=mid_points, y_grid=mid_points, z_grid=data,
                        xlim=[-180,180], ylim=[-180,180], asp=1.0,
                        plot_title="Ramachandran plot of " + pdb_id, drawlabels=False,
                        levels=rama_levels, col=rama_colors)
    #print ramachandran_type + " Done"

    r("dev.off()")
    #print "Done"
    return '<img src="/~jean/projet/tmp/' + png_filename + '"/>'
Example #26
0
    elif 'calphaforcefield'.startswith(ff):
        forcefield = CalphaForceField(cutoff=cutoff / 10.)
    elif 'amber94forcefield'.startswith(ff):
        from MMTK.ForceFields import Amber94ForceField
        forcefield = Amber94ForceField()
        model = 'all'
    else:
        raise NotImplementedError('unknown ff = ' + str(ff))
    if not quiet:
        print(' Forcefield:', forcefield.__class__.__name__)

    if model == 'calpha':
        selection = '(%s) and polymer and name CA' % (selection)

    f = StringIO(cmd.get_pdbstr(selection))
    conf = PDBConfiguration(f)
    items = conf.createPeptideChains(model)

    universe = InfiniteUniverse(forcefield)
    universe.protein = Protein(*items)

    nbasis = max(10, universe.numberOfAtoms() / 5)
    cutoff, nbasis = estimateCutoff(universe, nbasis)
    if not quiet:
        print(" Calculating %d low-frequency modes." % nbasis)

    if cutoff is None:
        modes = NormalModes(universe)
    else:
        subspace = FourierBasis(universe, cutoff)
        modes = SubspaceNormalModes(universe, subspace)
Example #27
0
import time

file = open("output.txt", 'w')

start = time.time()

#
# First problem: construct an all-atom model from a structure without
# hydrogens. This is the standard problem when using an all-atom force

# field with crystallographic structures.
#
#
# Load the PDB file.
configuration = PDBConfiguration('insulin.pdb')

# Construct the peptide chain objects. This also constructs positions
# for any missing hydrogens, using geometrical criteria.
chains = configuration.createPeptideChains()

# Make the protein object.
#insulin = Protein(chains)

# Define system
universe = InfiniteUniverse(Amber99ForceField(mod_files=['frcmod.ff99SB']))
universe.protein = Protein(chains)

# Initialize velocities
universe.initializeVelocitiesToTemperature(50. * Units.K)
print 'Temperature: ', universe.temperature()
Example #28
0
#!/usr/bin/python
"""
The Python script below will calculate the radius of gyration for the
assembly of all molecules specified in a PDB file (typically the
		asymmetric unit). To run it, you need the Molecular Modelling
Toolkit, available from http://dirac.cnrs-orleans.fr/MMTK/
"""

from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.PDBMoleculeFactory import PDBMoleculeFactory
from Scientific import N
import sys

conf = PDBConfiguration(sys.argv[1])
factory = PDBMoleculeFactory(conf)
molecules = Collection(factory.retrieveMolecules())


def radiusOfGyration(m):
    natoms = m.numberOfAtoms()
    center = sum((atom.position()
                  for atom in m.atomList()), Vector(0., 0., 0.)) / natoms
    sum_r = sum(
        ((atom.position() - center).length()**2 for atom in m.atomList()))
    return N.sqrt(sum_r / natoms)


print radiusOfGyration(molecules) / Units.Ang
Example #29
0

# A utility function that creates an image of an object by making
# a copy and applying a transformation to the copy.
def makeImage(object, transformation):
    image = deepcopy(object)
    for atom in image.atomList():
        atom.setPosition(transformation(atom.position()))
    return image


# Read PDB configuration and create MMTK objects for all peptide chains.
# A C-alpha model is used to reduce the system size. You can remove
# 'model="calpha"' to get an all-atom model, but for insulin this will
# create more than 380000 atoms for the 27-unit-cell crystal!
conf = PDBConfiguration('insulin.pdb')
chains = Collection(conf.createPeptideChains(model="calpha"))

# Apply non-crystallographic symmetries to construct the asymmetric unit
asu = Collection(chains)
for so in conf.ncs_transformations:
    if not so.given:
        image = makeImage(chains, so)
        asu.addObject(image)

# Apply crystallographic symmetries to construct the unit cell
# Note that the list of crystallographic symmetries includes the
# identity transformation, so the unmodified asu is not added
# to the unit cell.
cell = Collection()
for so in conf.cs_transformations:
# Create a nucleotide chain with a ligand from a PDB file.
#

from MMTK import *
from MMTK.PDB import PDBConfiguration
from MMTK.NucleicAcids import NucleotideChain
from MMTK.Visualization import view

# Load the PDB entry 110d. It contains a single DNA strand with a
# ligand (daunomycin).
configuration = PDBConfiguration('110d.pdb')

# Construct the nucleotide chain object. This also constructs positions
# for the missing hydrogens, using geometrical criteria.
chain = configuration.createNucleotideChains()[0]

# Construct the ligand. There is no definition of it in the database,
# so it can only be constructed as a collection of atoms. The second
# argument of createMolecules() is set to one in order to allow
# this use of an unknown residue.
ligand = configuration.createMolecules(['DM1'], 1)

# Put everyting in a universe and show it graphically.
universe = InfiniteUniverse()
universe.addObject(chain)
universe.addObject(ligand)

view(universe)