def initialize(self): """ Initialize the input parameters and analysis self variables """ self.configuration["dcd_file"]["instance"] = DCDFile(self.configuration["dcd_file"]['filename']) # The number of steps of the analysis. self.numberOfSteps = self.configuration['dcd_file']['instance']['n_frames'] # Create all objects from the PDB file. conf = PDBConfiguration(self.configuration['pdb_file']['filename']) # Creates a collection of all the chemical objects stored in the PDB file molecules = conf.createAll() # If the input trajectory has PBC create a periodic universe. if self.configuration['dcd_file']['instance']['has_pbc_data']: self._universe = ParallelepipedicPeriodicUniverse() # Otherwise create an infinite universe. else: self._universe = InfiniteUniverse() # The chemical objects found in the PDB file introduced into the universe. self._universe.addObject(molecules) resolve_undefined_molecules_name(self._universe) # A MMTK trajectory is opened for writing. self._trajectory = Trajectory(self._universe, self.configuration['output_file']['files'][0], mode='w') # A frame generator is created. self._snapshot = SnapshotGenerator(self._universe, actions=[TrajectoryOutput(self._trajectory, ["all"], 0, None, 1)])
def optimize(self): """Run the optimization with MMTK""" print '\n-------------------------------------------------------------------------------' print '\n MMTK Optimization starts...' print '\t.. building universe' configuration = PDBConfiguration(self.temp_pdb_file) # Construct the nucleotide chain object. This also constructs positions # for the missing hydrogens, using geometrical criteria. chain = configuration.createNucleotideChains()[0] universe = InfiniteUniverse() universe.addObject(chain) restraints = self.create_restraints(chain) # define force field print '\t.. setting up force field' if restraints: ff = Amber94ForceField() + restraints else: ff = Amber94ForceField() universe.setForceField(ff) # do the minimization print '\t.. starting minimization with %i cycles'%self.cycles minimizer = ConjugateGradientMinimizer(universe) minimizer(steps = self.cycles) # write the intermediate output print '\t.. writing MMTK output to %s'% self.temp_pdb_file if self.model_passive: print '\t (please note that MMTK applies a different numeration of residues.\n\t The original one will be restored in the final output).' universe.writeToFile(self.mmtk_output_file) open(self.temp_pdb_file, 'w').write(open(self.mmtk_output_file).read()) print '\n-------------------------------------------------------------------------------'
def get_sasa_mmtk(selection, state=-1, hydrogens='auto', quiet=1): ''' DESCRIPTION Get solvent accesible surface area using MMTK.MolecularSurface http://dirac.cnrs-orleans.fr/MMTK/ This command is very picky with missing atoms and wrong atom naming. SEE ALSO stub2ala, get_sasa, get_sasa_ball ''' try: import MMTK except ImportError: print(' ImportError: please install MMTK') raise CmdException from MMTK.PDB import PDBConfiguration from MMTK.Proteins import Protein from MMTK.MolecularSurface import surfaceAndVolume try: from cStringIO import StringIO except ImportError: from io import StringIO selection = selector.process(selection) state, quiet = int(state), int(quiet) radius = cmd.get_setting_float('solvent_radius') if hydrogens == 'auto': if cmd.count_atoms('(%s) and hydro' % selection) > 0: hydrogens = 'all' else: hydrogens = 'no_hydrogens' elif hydrogens == 'none': hydrogens = 'no_hydrogens' conf = PDBConfiguration(StringIO(cmd.get_pdbstr(selection))) system = Protein(conf.createPeptideChains(hydrogens)) try: area, volume = surfaceAndVolume(system, radius * 0.1) except: print(' Error: MMTK.MolecularSurface.surfaceAndVolume failed') raise CmdException if not quiet: print(' get_sasa_mmtk: %.3f Angstroms^2 (volume: %.3f Angstroms^3).' % (area * 1e2, volume * 1e3)) return area * 1e2
def parsePDBAtomSelection(filename, traj): univ = traj.universe pdb = PDBConfiguration(filename) # find objects in UNIV and PDB with the same number of atoms total = 0 pdb_index = 0 pdb_collection = {} for object in range(len(univ)): natom = len(univ[object].atomList()) pdb_natom = 0 while pdb_natom < natom: pdb_object = pdb.objects[pdb_index] if dir(pdb_object).count('atom_list') == 1: # no groups try: pdb_collection[object].append(pdb_object) except KeyError: pdb_collection[object] = [pdb_object] pdb_natom = pdb_natom + len(pdb_object.atom_list) # chains ? elif dir(pdb_object).count('residues') == 1: # biopolymers for residue in pdb_object.residues: try: pdb_collection[object].append(residue) except KeyError: pdb_collection[object] = [residue] pdb_natom = pdb_natom + len(residue.atom_list) pdb_index = pdb_index + 1 if pdb_natom != natom: return None ######### ERROR # match PDBMAP names from UNIV against PDB atom names # and add selected atoms to the collection selection = [] for object in range(len(univ)): pdbmap = [] if univ[object].__class__.__name__ == 'Protein': for chain in univ[object].chains: for residue in chain[0]: pdbmap.append(residue) elif univ[object].__class__.__name__ == 'Molecule': pdbmap.append(univ[object]) for item in range(len(pdbmap)): pdb_item = pdb_collection[object][item] atom_list = pdbmap[item].atomList() if upper(pdbmap[item].pdbmap[0][0]) != upper(pdb_item.name): return None ######### ERROR for ia in pdb_item.atoms.keys(): atom = pdb_item.atoms[ia] if atom.properties['element'] == '*': anum = pdbmap[item].pdbmap[0][1][ia].number atra = atom_list[anum] selection.append(atra) return Collection(selection)
def setUp(self): cif_data = MMCIFStructureFactorData('2onx-sf.cif.gz', fill=True) self.reflections = cif_data.reflections conf = PDBConfiguration('2ONX.pdb.gz') factory = PDBMoleculeFactory(conf) self.universe = factory.retrieveUnitCell() assert self.reflections.cell.isCompatibleWith(self.universe, 1.e-3) self.adps = ParticleTensor(self.universe) for atom in self.universe.atomList(): b = atom.temperature_factor / (8. * N.pi**2) self.adps[atom] = b * delta
def run(pdb1, pdb2, trajectory, nsteps, delpdb=0): universe = TransitionPathUniverse(CalphaForceField(2.5)) universe.protein = Protein(pdb1, model='calpha') conf1 = copy(universe.configuration()) struct2 = PDBConfiguration(pdb2).peptide_chains for i in range(len(universe.protein)): struct2[i].applyTo(universe.protein[i]) if delpdb: os.unlink(pdb1) os.unlink(pdb2) tr, rms = universe.findTransformation(conf1) universe.applyTransformation(tr) conf2 = copy(universe.configuration()) universe.setBoundingBox(conf1, conf2) path = TransitionPath(universe, conf1, conf2, step_length=0.05, nmodes=50) path.refine(nsteps) path.writeBestToTrajectory(trajectory, ("Transition path after %d steps, " % nsteps) + ("energy %d," % path.best_penalty))
from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.Proteins import PeptideChain, Protein # # First problem: construct an all-atom model from a structure without # hydrogens. This is the standard problem when using an all-atom force # field with crystallographic structures. # # Note: the simple solution in this case is just # insulin = Protein('insulin.pdb') # but the explicit form shown below is necessary when any kind of # modification is required. # # Load the PDB file. configuration = PDBConfiguration('insulin.pdb') # Construct the peptide chain objects. This also constructs positions # for any missing hydrogens, using geometrical criteria. chains = configuration.createPeptideChains() # Make the protein object. insulin = Protein(chains) # Write out the structure with hydrogens to a new file - we will use # it as an input example later on. insulin.writeToFile('insulin_with_h.pdb') # # Second problem: read a file with hydrogens and create a structure
def parsePDBReference(filename, pattern, verbose=None): """ for a given filename of a PDB file match atoms against objects in a list <pattern> """ pdb = PDBConfiguration(filename) if verbose: print 'A quick look into an MMTK pattern reveals ', print 'the presence of\n\t', len(pattern), print ' atom collections ready to be matched' tokens = getTokens(filename) if len(tokens) > 0 and verbose: print 'RE pattern defined in your PDB file: ' print tokens pdb_collection = {} for object in range(len(pattern)): natom = pattern[object].numberOfAtoms() pdb_natom = 0 for pdb_object in pdb.objects: if hasattr(pdb_object, 'atom_list'): gj = 0 for atom in pdb_object.atom_list: if atom.properties['element'] == '*': gj = gj + 1 if gj > 0: try: pdb_collection[object].append(pdb_object) except KeyError: pdb_collection[object] = [pdb_object] pdb_natom = pdb_natom + gj if hasattr(pdb_object, 'residues'): for residue in pdb_object.residues: gj = 0 for atom in residue.atom_list: if atom.properties['element'] == '*': gj = gj + 1 if gj > 0: try: pdb_collection[object].append(residue) except KeyError: pdb_collection[object] = [residue] pdb_natom = pdb_natom + gj if pdb_natom < natom: print 'Warning: fewer atoms in PDB for object:', print pattern[object] elif pdb_natom > natom: print 'ERROR: the number of atoms does not match' print object, len(pattern), len(pdb.objects) print pattern[object].atomList() print pdb_natom, pdb_collection[object] return None selection = [] for object in range(len(pattern)): coll = [] pdbmap = [] type_name = pattern[object].__class__.__name__ if verbose: print '\n\nPROCESSING ', pattern[object], print ' of type: ', type_name print '----------\n' if type_name == 'Protein': for chain in pattern[object].chains: for residue in chain[0]: pdbmap.append(residue) elif type_name == 'SubChain': for residue in pattern[object]: pdbmap.append(residue) elif type_name == 'Residue' or type_name == 'Molecule' or \ type_name == 'Collection': pdbmap.append(pattern[object]) if type_name == 'Protein' or type_name == 'SubChain' or\ type_name == 'Residue': for it in range(len(pdbmap)): pdb_item = pdb_collection[object][it] atom_list = pdbmap[it].atomList() res_list = [] for ia in pdbmap[it].pdbmap: res_list.append(ia[0]) res_list = map(upper, res_list) if res_list.count(upper(pdb_item.name)) == 0: print 'ERROR: problem with matching' print pdb_item.name, res_list return None for ia in pdb_item.atoms.keys(): atom = pdb_item.atoms[ia] if atom.properties['element'] == '*': anum = pdbmap[it].pdbmap[0][1][ia].number atra = atom_list[anum] oldpos = atra.position() atra.setPosition(atom.position / 10.) newpos = atra.position() coll.append(atra) if verbose: quickCheck(pdb_item, atom, atra, oldpos, newpos) selection.append(Collection(coll)) elif type_name == 'Molecule' or type_name == 'Collection': for it in range(len(pdb_collection[object])): pdb_item = pdb_collection[object][it] atom_list = pdbmap[0].atomList() res_list = [] gj = 0 for ia in pdbmap[0].pdbmap: if evalREPattern(pdb_item.name, upper(ia[0]), tokens): gj = 1 break if not gj: print 'ERROR: problem with matching' return None atname_list = pdb_item.atoms.keys() for ia in range(len(atname_list)): atom = pdb_item.atoms[atname_list[ia]] if atom.properties['element'] == '*' and \ evalREPattern(pdb_item.name, upper(pdbmap[0].pdbmap[it][0]),tokens): mmtk_map = pdbmap[0].pdbmap[it][1] for ib in mmtk_map.keys(): #if evalREPattern(atname_list[ia],ib,tokens): if atname_list[ia] == ib: aname = ib break anum = pdbmap[0].pdbmap[it][1][aname].number atra = atom_list[anum] oldpos = atra.position() atra.setPosition(atom.position / 10.) newpos = atra.position() coll.append(atra) if verbose: quickCheck(pdb_item, atom, atra, oldpos, newpos) selection.append(Collection(coll)) info = {'MMTK': pattern, 'PDB': selection} if verbose: print len(Collection(selection).atomList()), print 'MMTK atoms in ', len(pattern), 'objects', print 'were mapped successfully on PDB atoms in file ', filename return info
# This is a slightly modified version of analysis.py. It uses only the # C-alpha atoms of the peptide chains, and it discards the primary sequence # information. This makes it faster (fewer atoms) and suitable for comparing # proteins of similar fold but with different primary sequences. # from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.Proteins import PeptideChain # First we read the two PDB files. configuration1 = PDBConfiguration('4q21.pdb.gz') configuration2 = PDBConfiguration('6q21.pdb.gz') # Set all residue names to GLY. This permits the comparison # and superposition of proteins with different sequences. Since # a C-alpha model is used, the side chains are thrown away anyway. for conf in [configuration1, configuration2]: for chain in conf.peptide_chains: for residue in chain: residue.name = 'GLY' # The first file contains a monomer, the second one a tetramer in # which each chain is almost identical to the monomer from the first # file. We have to cut off the last (incomplete) residue from the # monomer and the last three residues of each chain of the tetramers # to get matching sequences. We'll just deal with one of the chains of # the tetramer here. monomer = configuration1.peptide_chains[0]
# the crystallographic unit cell from the information in a PDB file. # # Note that this will not necessarily work with any PDB file. Many files # use non-crystallographic symmetry information in a non-standard way. # This is usually explained in REMARK records, but those cannot be # evaluated automatically. # from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.Proteins import Protein # Read PDB configuration and create MMTK objects for all peptide chains. # A C-alpha model is used to reduce the system size. You can remove # 'model="calpha"' to get an all-atom model. conf = PDBConfiguration('insulin.pdb') chains = Collection(conf.createPeptideChains(model="calpha")) # Copy and transform the objects representing the asymmetric unit in order # to obtain the contents of the unit cell. chains = conf.asuToUnitCell(chains) # Construct a periodic universe representing the unit cell. universe = conf.createUnitCellUniverse() # Add each chain as one protein. If the unit cell contains multimers, # the chains must be combined into protein objects by hand, # as no corresponding information can be extracted from the PDB file. for chain in chains: universe.addObject(Protein(chain))
dist = Numeric.sqrt(dist2) if dist >= contact_factor*(a1.vdW_radius + a2.vdW_radius + cutoff): continue if not done.has_key((index1, index2)): clist.append(Contact(a1, a2, dist)) done[(index1, index2)] = 1 return clist if __name__ == '__main__': from MMTK.PDB import PDBConfiguration from MMTK import Units import sys target_filename = sys.argv[2] pdb_conf1 = PDBConfiguration(target_filename) if sys.argv[1][:2] == '-f': chains = pdb_conf1.createNucleotideChains() molecule_names = [] if len(chains) >= 2: clist = findContacts(chains[0], chains[1]) else: molecule_names = [] for (key, mol) in pdb_conf1.molecules.items(): for o in mol: molecule_names.append(o.name) targets = pdb_conf1.createAll(molecule_names = molecule_names) if len(molecule_names) > 1: clist = findContacts(targets[0], targets[1]) else: atoms = targets.atomList()
""" nucleotide_construction.py creates a nucleotide chain with a ligand from PDB file """ from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.NucleicAcids import NucleotideChain from MMTK.Visualization import view """ Load PDB entry 110d. It contains a single DNA strand with a ligand daunomycin """ configuration = PDBConfiguration('110d.pdb') """ Construct nucleotide chain object. This also constructs positions for missing hydrogens, using geometrical criteria. """ chain = configuration.createNucleotideChains()[0] """ Construct the ligand. There is no definition of it in the database, so it can only be constructed as a collection of atoms. The second argument of createMolecules() is set to one in order to allow this use of an unknown residue. """ ligand = configuration.createMolecules(['DM1'], 1) # Put everything in a universe and show it graphically universe = InfiniteUniverse() universe.addObject(chain) universe.addObject(ligand) view(universe)
from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.Proteins import Protein from MMTK.ForceFields import CalphaForceField from MMTK.NormalModes import EnergeticModes import pylab import numpy # Make a universe for the first configuration universe = InfiniteUniverse(CalphaForceField()) universe.protein = Protein('1SU4.pdb', model='calpha') conf_1SU4 = universe.copyConfiguration() # Apply the second configuration and do a rigid-body superposition fit PDBConfiguration('1T5T.pdb').applyTo(universe.protein) tr, rms = universe.findTransformation(conf_1SU4) universe.applyTransformation(tr) conf_1T5T = universe.copyConfiguration() # Set first configuration and calculate normal modes universe.setConfiguration(conf_1SU4) modes = EnergeticModes(universe, 300. * Units.K) # Calculate the normalized difference vector diff = (conf_1T5T - conf_1SU4).scaledToNorm(1.) # Calculate the squared overlaps for all modes mode_numbers = numpy.arange(6, len(modes)) overlaps = [modes.rawMode(i).dotProduct(diff)**2 for i in mode_numbers]
forcefield = CalphaForceField(cutoff=cutoff/10.) elif 'amber94forcefield'.startswith(ff): from MMTK.ForceFields import Amber94ForceField forcefield = Amber94ForceField() model = 'all' else: raise NotImplementedError('unknown ff = ' + str(ff)) if not quiet: print ' Forcefield:', forcefield.__class__.__name__ if model == 'calpha': selection = '(%s) and polymer and name CA' % (selection) from cStringIO import StringIO f = StringIO(cmd.get_pdbstr(selection)) conf = PDBConfiguration(f) items = conf.createPeptideChains(model) universe = InfiniteUniverse(forcefield) universe.protein = Protein(*items) nbasis = max(10, universe.numberOfAtoms()/5) cutoff, nbasis = estimateCutoff(universe, nbasis) if not quiet: print " Calculating %d low-frequency modes." % nbasis if cutoff is None: modes = NormalModes(universe) else: subspace = FourierBasis(universe, cutoff) modes = SubspaceNormalModes(universe, subspace)
cutoff): continue if not done.has_key((index1, index2)): clist.append(Contact(a1, a2, dist)) done[(index1, index2)] = 1 return clist if __name__ == '__main__': from MMTK.PDB import PDBConfiguration from MMTK import Units import sys target_filename = sys.argv[2] pdb_conf1 = PDBConfiguration(target_filename) if sys.argv[1][:2] == '-f': chains = pdb_conf1.createNucleotideChains() molecule_names = [] if len(chains) >= 2: clist = findContacts(chains[0], chains[1]) else: molecule_names = [] for (key, mol) in pdb_conf1.molecules.items(): for o in mol: molecule_names.append(o.name) targets = pdb_conf1.createAll(molecule_names=molecule_names) if len(molecule_names) > 1: clist = findContacts(targets[0], targets[1]) else: atoms = targets.atomList()
from MMTK import * from MMTK.PDB import PDBConfiguration # A utility function that creates an image of an object by making # a copy and applying a transformation to the copy. def makeImage(object, transformation): image = deepcopy(object) for atom in image.atomList(): atom.setPosition(transformation(atom.position())) return image # Read PDB configuration and create MMTK objects for all peptide chains. # A C-alpha model is used to reduce the system size. You can remove # 'model="calpha"' to get an all-atom model, but for insulin this will # create more than 380000 atoms for the 27-unit-cell crystal! conf = PDBConfiguration('insulin.pdb') chains = Collection(conf.createPeptideChains(model="calpha")) # Apply non-crystallographic symmetries to construct the asymmetric unit asu = Collection(chains) for so in conf.ncs_transformations: if not so.given: image = makeImage(chains, so) asu.addObject(image) # Apply crystallographic symmetries to construct the unit cell # Note that the list of crystallographic symmetries includes the # identity transformation, so the unmodified asu is not added # to the unit cell. cell = Collection() for so in conf.cs_transformations:
pos1 = atom_data[i] tot_rad = pos1[3] (points1, points_unit) = atom_surf(nbors, i, atom_data, pos1, tot_rad, point_density, tess1, ret_fmt >= 2) surf_points[a1] = _xlate_results(points1, points_unit, point_density, tot_rad, pos1, ret_fmt, cent) return surf_points if __name__ == '__main__': from MMTK.PDB import PDBConfiguration from MMTK import Units import profile # Load the PDB sequences target_filename = sys.argv[1] pdb_conf1 = PDBConfiguration(target_filename) molecule_names = [] if 0: # enable to include all molecules in file for (key, mol) in pdb_conf1.molecules.items(): for o in mol: molecule_names.append(o.name) target = pdb_conf1.createAll(molecule_names = molecule_names) atoms = [] for a in target.atomList(): if a.symbol != 'H': atoms.append(a) if a.index is None: a.index = len(atoms) point_density = 1026 s = surface_atoms(atoms, solvent_radius = 1.4*Units.Ang, point_density = point_density, ret_fmt = 4)
# Retrieve atomic fluctuation information from a PDBConfiguration object. # from MMTK import * from MMTK.PDB import PDBConfiguration from Scientific import N # Read a PDB file containing ANISOU records. conf = PDBConfiguration('1G66.pdb') # By passing the applyTo methods of MMTK.PDB a dictionary argument # atom_map, one obtains a dictionary from MMTK atom objects to the # corresponding PDB atom objects. This dictionary can be used to # retrieve additional atom data from the PDB file. atom_map = {} for c in conf.peptide_chains: # Create a PeptideChain object chain = c.createPeptideChain() # Retrieve the atom_map dictionary # Note: this also redefines the configuration, but in this # application that makes no difference since it is the same # that was defined in the previous line. c.applyTo(chain, atom_map=atom_map) # Print the B factor and the trace of the anisotropic displacement # tensor for each atom for which both are available. They should # be equal, but due to the limited precision in PDB files # there can be small differences. for atom in chain.atomList(): try: pdb_atom = atom_map[atom]
from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.Proteins import PeptideChain, Protein # # First problem: construct an all-atom model from a structure without # hydrogens. This is the standard problem when using an all-atom force # field with crystallographic structures. # # Note: the simple solution in this case is just # insulin = Protein('insulin.pdb') # but the explicit form shown below is necessary when any kind of # modification is required. # # Load the PDB file. configuration = PDBConfiguration('insulin.pdb') # Construct the peptide chain objects. This also constructs positions # for any missing hydrogens, using geometrical criteria. chains = configuration.createPeptideChains() # Make the protein object. insulin = Protein(chains) # Write out the structure with hydrogens to a new file - we will use # it as an input example later on. insulin.writeToFile('insulin_with_h.pdb') # # Second problem: read a file with hydrogens and create a structure # without them. This is useful for analysis; if you don't need the
def normalmodes_mmtk(selection, cutoff=12.0, ff='Deformation', first=7, last=10, prefix='mmtk', states=7, factor=-1, quiet=1): ''' DESCRIPTION Fast normal modes for large proteins using an elastic network model (CA only) Based on: http://dirac.cnrs-orleans.fr/MMTK/using-mmtk/mmtk-example-scripts/normal-modes/ ''' try: import MMTK except ImportError: print('Failed to import MMTK, please add to PYTHONPATH') raise CmdException selection = selector.process(selection) cutoff = float(cutoff) first, last = int(first), int(last) states, factor, quiet = int(states), float(factor), int(quiet) from math import log from chempy import cpv from MMTK import InfiniteUniverse from MMTK.PDB import PDBConfiguration from MMTK.Proteins import Protein from MMTK.NormalModes import NormalModes from MMTK.ForceFields import DeformationForceField, CalphaForceField from MMTK.FourierBasis import FourierBasis, estimateCutoff from MMTK.NormalModes import NormalModes, SubspaceNormalModes model = 'calpha' ff = ff.lower() if 'deformationforcefield'.startswith(ff): forcefield = DeformationForceField(cutoff=cutoff / 10.) elif 'calphaforcefield'.startswith(ff): forcefield = CalphaForceField(cutoff=cutoff / 10.) elif 'amber94forcefield'.startswith(ff): from MMTK.ForceFields import Amber94ForceField forcefield = Amber94ForceField() model = 'all' else: raise NotImplementedError('unknown ff = ' + str(ff)) if not quiet: print(' Forcefield:', forcefield.__class__.__name__) if model == 'calpha': selection = '(%s) and polymer and name CA' % (selection) f = StringIO(cmd.get_pdbstr(selection)) conf = PDBConfiguration(f) items = conf.createPeptideChains(model) universe = InfiniteUniverse(forcefield) universe.protein = Protein(*items) nbasis = max(10, universe.numberOfAtoms() / 5) cutoff, nbasis = estimateCutoff(universe, nbasis) if not quiet: print(" Calculating %d low-frequency modes." % nbasis) if cutoff is None: modes = NormalModes(universe) else: subspace = FourierBasis(universe, cutoff) modes = SubspaceNormalModes(universe, subspace) natoms = modes.array.shape[1] frequencies = modes.frequencies if factor < 0: factor = log(natoms) if not quiet: print(' set factor to %.2f' % (factor)) if True: # cmd.count_atoms(selection) != natoms: import tempfile, os from MMTK import DCD filename = tempfile.mktemp(suffix='.pdb') sequence = DCD.writePDB(universe, None, filename) z = [a.index for a in sequence] selection = cmd.get_unused_name('_') cmd.load(filename, selection, zoom=0) os.remove(filename) if cmd.count_atoms(selection) != natoms: print('hmm... still wrong number of atoms') def eigenfacs_iter(mode): x = modes[mode - 1].array return iter(x.take(z, 0)) for mode in range(first, min(last, len(modes)) + 1): name = prefix + '%d' % mode cmd.delete(name) if not quiet: print(' normalmodes: object "%s" for mode %d with freq. %.6f' % \ (name, mode, frequencies[mode-1])) for state in range(1, states + 1): cmd.create(name, selection, 1, state, zoom=0) cmd.alter_state( state, name, '(x,y,z) = cpv.add([x,y,z], cpv.scale(next(myit), myfac))', space={ 'cpv': cpv, 'myit': eigenfacs_iter(mode), 'next': next, 'myfac': 1e2 * factor * ((state - 1.0) / (states - 1.0) - 0.5) }) cmd.delete(selection) if model == 'calpha': cmd.set('ribbon_trace_atoms', 1, prefix + '*') cmd.show_as('ribbon', prefix + '*') else: cmd.show_as('lines', prefix + '*')
# This example shows how a universe can be built from a PDB file in such # a way that all objects in the PDB file are represented as well as # possible, using AtomCluster objects when nothing more specific can # be constructed. # # This procedure is the only way to construct a universe that uses the # same internal atom order as the PDB file, which is important for # data exchange with other programs. from MMTK import * from MMTK.PDB import PDBConfiguration configuration = PDBConfiguration('some_file.pdb') universe = InfiniteUniverse() universe.addObject(configuration.createAll(None, 1))
def _get_molecules(pdb_file): configuration = PDBConfiguration(pdb_file) molecules = configuration.createAll() return molecules
def generate_ramachandran(pdb_id): rama_GENERAL = "General" rama_GLYCINE = "Glycine" rama_PROLINE = "Proline" rama_PRE_PRO = "Pre-Pro" ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO] # I have used the same colours as RAMPAGE # http://raven.bioc.cam.ac.uk/rampage.php rama_settings = {"General" : ([0, 0.0005, 0.02, 1], ['#FFFFFF','#B3E8FF','#7FD9FF'], "top500angles/pct/rama/rama500-general.data"), # or rama500-general-nosec.data "Glycine" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#FFE8C5','#FFCC7F'], "top500angles/pct/rama/rama500-gly-sym.data"), # or rama500-gly-sym-nosec.data "Proline" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#D0FFC5','#7FFF8C'], "top500angles/pct/rama/rama500-pro.data"), "Pre-Pro" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#B3E8FF','#7FD9FF'], "top500angles/pct/rama/rama500-prepro.data")} #P.S. Also rama500-ala-nosec.data def load_data_file(filename) : STEP=2 HALF_STEP=1 STEP = HALF_STEP*2 lower_bounds = range(-180, 180, STEP) mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP) upper_bounds = range(-180+STEP, 180+STEP, STEP) data = numpy.array([[0.0 for x in mid_points] for y in mid_points]) """ # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30" # Number of dimensions: 2 # For each dimension, 1 to 2: lower_bound upper_bound number_of_bins wrapping # x1: -180.0 180.0 180 true # x2: -180.0 180.0 180 true # List of table coordinates and values. (Value is last number on each line.) -179.0 -179.0 0.0918642445114388 -179.0 -177.0 0.07105717866463215 ... """ input_file = open(filename,"r") for line in input_file : #Strip the newline character(s) from the end of the line if line[-1]=="\n" : line = line[:-1] if line[-1]=="\r" : line = line[:-1] if line[0]=="#" : #comment pass else : #data parts = line.split() assert len(parts)==3 x1 = float(parts[0]) #phi x2 = float(parts[1]) #psi value = float(parts[2]) assert x1 == float(int(x1)) assert x2 == float(int(x2)) i1 = mid_points.index(int(x1)) i2 = mid_points.index(int(x2)) data[i1,i2]=value input_file.close() return (data, lower_bounds, mid_points, upper_bounds) #filename = "stat/rama/rama500-general.data" #data, lower_bounds, mid_points, upper_bounds = load_data_file(filename) ##print sum(sum(data)) r.library("MASS") #print "Creating R function", r(""" ramachandran.plot <- function(x.scatter, y.scatter, x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid, xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE), zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels), nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) - 1), plot.title="", plot.axes, key.title, key.axes, asp = NA, xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes, ...) { if (missing(z.grid)) { stop("no 'z.grid' matrix specified") } else if (is.list(x.grid)) { y.grid <- x.grid$y x.grid <- x.grid$x } if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0)) stop("increasing 'x.grid' and 'y.grid' values expected") plot.new() plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp) if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1) stop("no proper 'z.grid' matrix specified") if (!is.double(z.grid)) storage.mode(z.grid) <- "double" .filled.contour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels), col = col) if (!(missing(x.scatter)) && !(missing(y.scatter))) { plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL), xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp, type="p", pch=20, cex=0.1) } if (missing(plot.axes)) { if (axes) { title(main=plot.title, xlab=expression(phi), ylab=expression(psi)) axis(1, at=c(-180,-90,0,90,180)) axis(2, at=c(-180,-90,0,90,180)) } } else plot.axes if (frame.plot) box() if (missing(plot.title)) title(...) else plot.title invisible() } """) #print "Done" def degrees(rad_angle) : """Converts and angle in radians to degrees, mapped to the range [-180,180]""" angle = rad_angle * 180 / math.pi #Note this assume the radians angle is positive as that's what MMTK does while angle > 180 : angle = angle - 360 return angle def next_residue(residue) : """Expects an MMTK residue, returns the next residue in the chain, or None""" #Proteins go N terminal --> C terminal #The next reside is bonded to the C of this atom... for a in residue.peptide.C.bondedTo(): if a.parent.parent != residue: return a.parent.parent return None def residue_amino(residue) : """Expects an MMTK residue, returns the three letter amino acid code in upper case""" if residue : return residue.name[0:3].upper() else : return None def residue_ramachandran_type(residue) : """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)""" if residue_amino(residue)=="GLY" : return rama_GLYCINE elif residue_amino(residue)=="PRO" : return rama_PROLINE elif residue_amino(next_residue(residue))=="PRO" : #exlcudes those that are Pro or Gly return rama_PRE_PRO else : return rama_GENERAL scatter_phi = dict() scatter_psi = dict() for ramachandran_type in ramachandran_types : scatter_phi[ramachandran_type]=[] scatter_psi[ramachandran_type]=[] pdb_filename = "../data/%s.pdb" % pdb_id #print "Loading PDB file: " + pdb_filename #protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens") # Load the PDB file, ignore the hydrogrens, and then build a model of the peptides: configuration = PDBConfiguration(pdb_filename) configuration.deleteHydrogens() protein = Protein(configuration.createPeptideChains(model = "no_hydrogens")) for chain in protein : #print chain.name for residue in chain : phi, psi = residue.phiPsi() #print residue.name, phi, psi if phi and psi : ramachandran_type = residue_ramachandran_type(residue) assert ramachandran_type in ramachandran_types scatter_phi[ramachandran_type].append(degrees(phi)) scatter_psi[ramachandran_type].append(degrees(psi)) assert len(scatter_phi) == len(scatter_psi) #print "Done" png_filename = "ppii%d%s.png" % (random.randint(0, 1000000), pdb_id) png_filepath = "../tmp/" + png_filename png_command = 'png("' + png_filepath + '")' #print r(png_command) #To get four plots on one page, you could use : # #r.split_screen([2,2]) #split into two by two screen # #Or: # #r.layout(Numeric.array([[1,2],[3,4]]), respect=True) # #But I went for simply: #r.par(mfrow=[2,2]) #for (i,ramachandran_type) in enumerate(ramachandran_types) : #pdf_filename = "../%s_%s.pdf" % (pdb_id, ramachandran_type) (rama_levels, rama_colors, rama_filename) = rama_settings["General"] #print "Loading data file: " + rama_filename, data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename) #print "Done" ##print "Creating PDF output file: " + pdf_filename, #r.pdf(pdf_filename) #r.plot(scatter_phi, scatter_psi) #print "Generating quadrant %i, %s" % (i+1, ramachandran_type) #r.screen(i+1) #Use small margins to make the plots nice and big, #and specify a SQUARE plot area (to go with aspect ratio, asp=1) #r.par(mar = [2, 2, 2, 2], pty="s") #This function will do a Ramachandran plot in the next quadrant #which we setup using par(mfrow-...) r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type], y_scatter=scatter_psi[ramachandran_type], x_grid=mid_points, y_grid=mid_points, z_grid=data, xlim=[-180,180], ylim=[-180,180], asp=1.0, plot_title="Ramachandran plot of " + pdb_id, drawlabels=False, levels=rama_levels, col=rama_colors) #print ramachandran_type + " Done" r("dev.off()") #print "Done" return '<img src="/~jean/projet/tmp/' + png_filename + '"/>'
elif 'calphaforcefield'.startswith(ff): forcefield = CalphaForceField(cutoff=cutoff / 10.) elif 'amber94forcefield'.startswith(ff): from MMTK.ForceFields import Amber94ForceField forcefield = Amber94ForceField() model = 'all' else: raise NotImplementedError('unknown ff = ' + str(ff)) if not quiet: print(' Forcefield:', forcefield.__class__.__name__) if model == 'calpha': selection = '(%s) and polymer and name CA' % (selection) f = StringIO(cmd.get_pdbstr(selection)) conf = PDBConfiguration(f) items = conf.createPeptideChains(model) universe = InfiniteUniverse(forcefield) universe.protein = Protein(*items) nbasis = max(10, universe.numberOfAtoms() / 5) cutoff, nbasis = estimateCutoff(universe, nbasis) if not quiet: print(" Calculating %d low-frequency modes." % nbasis) if cutoff is None: modes = NormalModes(universe) else: subspace = FourierBasis(universe, cutoff) modes = SubspaceNormalModes(universe, subspace)
import time file = open("output.txt", 'w') start = time.time() # # First problem: construct an all-atom model from a structure without # hydrogens. This is the standard problem when using an all-atom force # field with crystallographic structures. # # # Load the PDB file. configuration = PDBConfiguration('insulin.pdb') # Construct the peptide chain objects. This also constructs positions # for any missing hydrogens, using geometrical criteria. chains = configuration.createPeptideChains() # Make the protein object. #insulin = Protein(chains) # Define system universe = InfiniteUniverse(Amber99ForceField(mod_files=['frcmod.ff99SB'])) universe.protein = Protein(chains) # Initialize velocities universe.initializeVelocitiesToTemperature(50. * Units.K) print 'Temperature: ', universe.temperature()
#!/usr/bin/python """ The Python script below will calculate the radius of gyration for the assembly of all molecules specified in a PDB file (typically the asymmetric unit). To run it, you need the Molecular Modelling Toolkit, available from http://dirac.cnrs-orleans.fr/MMTK/ """ from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.PDBMoleculeFactory import PDBMoleculeFactory from Scientific import N import sys conf = PDBConfiguration(sys.argv[1]) factory = PDBMoleculeFactory(conf) molecules = Collection(factory.retrieveMolecules()) def radiusOfGyration(m): natoms = m.numberOfAtoms() center = sum((atom.position() for atom in m.atomList()), Vector(0., 0., 0.)) / natoms sum_r = sum( ((atom.position() - center).length()**2 for atom in m.atomList())) return N.sqrt(sum_r / natoms) print radiusOfGyration(molecules) / Units.Ang
# A utility function that creates an image of an object by making # a copy and applying a transformation to the copy. def makeImage(object, transformation): image = deepcopy(object) for atom in image.atomList(): atom.setPosition(transformation(atom.position())) return image # Read PDB configuration and create MMTK objects for all peptide chains. # A C-alpha model is used to reduce the system size. You can remove # 'model="calpha"' to get an all-atom model, but for insulin this will # create more than 380000 atoms for the 27-unit-cell crystal! conf = PDBConfiguration('insulin.pdb') chains = Collection(conf.createPeptideChains(model="calpha")) # Apply non-crystallographic symmetries to construct the asymmetric unit asu = Collection(chains) for so in conf.ncs_transformations: if not so.given: image = makeImage(chains, so) asu.addObject(image) # Apply crystallographic symmetries to construct the unit cell # Note that the list of crystallographic symmetries includes the # identity transformation, so the unmodified asu is not added # to the unit cell. cell = Collection() for so in conf.cs_transformations:
# Create a nucleotide chain with a ligand from a PDB file. # from MMTK import * from MMTK.PDB import PDBConfiguration from MMTK.NucleicAcids import NucleotideChain from MMTK.Visualization import view # Load the PDB entry 110d. It contains a single DNA strand with a # ligand (daunomycin). configuration = PDBConfiguration('110d.pdb') # Construct the nucleotide chain object. This also constructs positions # for the missing hydrogens, using geometrical criteria. chain = configuration.createNucleotideChains()[0] # Construct the ligand. There is no definition of it in the database, # so it can only be constructed as a collection of atoms. The second # argument of createMolecules() is set to one in order to allow # this use of an unknown residue. ligand = configuration.createMolecules(['DM1'], 1) # Put everyting in a universe and show it graphically. universe = InfiniteUniverse() universe.addObject(chain) universe.addObject(ligand) view(universe)