def build_bdna(sequence, **kwargs): """ Uses Ambertools' Nucleic Acid Builder to build a 3D double-helix B-DNA structure. Args: sequence (str): DNA sequence for one of the strands (a complementary sequence will automatically be created) **kwargs: arguments for :meth:`compute.run_job` Returns: moldesign.Molecule: B-DNA double helix """ infile = 'molecule m;\nm = bdna( "%s" );\nputpdb( "helix.pdb", m, "-wwpdb");\n'% \ sequence.lower() def finish_job(job): mol = mdt.read(job.get_output('helix.pdb'), format='pdb') mol.name = 'BDNA: %s' % sequence return mol job = pyccc.Job(command='nab -o buildbdna build.nab && ./buildbdna', image=mdt.compute.get_image_path(IMAGE), inputs={'build.nab': infile}, name='NAB_build_bdna', when_finished=finish_job) return mdt.compute.run_job(job, _return_result=True, **kwargs)
def _antechamber_calc_charges(mol, ambname, chargename, kwargs): charge = utils.if_not_none(mol.charge, 0) command = 'antechamber -fi mol2 -i mol.mol2 -fo mol2 -o out.mol2 -c %s -an n' % ambname if charge != 0: command += ' -nc %d' % charge.value_in(u.q_e) def finish_job(job): """Callback to complete the job""" lines = iter(job.get_output('out.mol2').read().split('\n')) charges = utils.DotDict(type='atomic') line = lines.next() while line.strip()[:len('@<TRIPOS>ATOM')] != '@<TRIPOS>ATOM': line = lines.next() line = lines.next() while line.strip()[:len('@<TRIPOS>BOND')] != '@<TRIPOS>BOND': fields = line.split() idx = int(fields[0]) - 1 assert mol.atoms[idx].name == fields[1] charges[mol.atoms[idx]] = u.q_e * float(fields[-1]) line = lines.next() mol.properties[chargename] = charges return charges job = pyccc.Job(image=mdt.compute.get_image_path(IMAGE), command=command, name="%s, %s" % (chargename, mol.name), inputs={'mol.mol2': mol.write(format='mol2')}, when_finished=finish_job) return compute.run_job(job, _return_result=True, **kwargs)
def _make_calculation_job(self, requests=None): params, inputfiles = self._prep_calculation(requests) inputfiles['params.json'] = mdt.utils.json_dumps(dict(params)) job = pyccc.Job(image=mdt.compute.get_image_path(self.IMAGE), command='%s && %s' % (self.RUNNER, self.PARSER), inputs=inputfiles, when_finished=self.finish, name='%s/%s' % (self.MODELNAME, self.mol.name)) return job
def build_dna_helix(sequence, helix_type='B', **kwargs): """ Uses Ambertools' Nucleic Acid Builder to build a 3D DNA double-helix. Args: sequence (str): DNA sequence for one of the strands (a complementary sequence will automatically be created) helix_type (str): Type of helix - 'A'=Arnott A-DNA 'B'=B-DNA (from standard templates and helical params), 'LB'=Langridge B-DNA, 'AB'=Arnott B-DNA, 'SB'=Sasisekharan left-handed B-DNA **kwargs: arguments for :meth:`compute.run_job` All helix types except 'B' are taken from fiber diffraction data (see the refernce for details) Returns: moldesign.Molecule: B-DNA double helix References: See NAB / AmberTools documentation: http://ambermd.org/doc12/Amber16.pdf, pg 771-2 """ infile = ['molecule m;'] if helix_type.lower() == 'b': infile.append('m = bdna( "%s" );' % sequence.lower()) else: infile.append('m = fd_helix( "%sdna", "%s", "dna" );' % (helix_type.lower(), sequence.lower())) infile.append('putpdb( "helix.pdb", m, "-wwpdb");\n') def finish_job(job): mol = mdt.fileio.read_pdb(job.get_output('helix.pdb').open(), assign_ccd_bonds=False) if mol.num_chains == 1: assert mol.num_residues % 2 == 0 oldchain = mol.chains[0] oldchain.name = oldchain.pdbindex = oldchain.pdbname = 'A' newchain = mdt.Chain('B') for residue in mol.residues[mol.num_residues // 2:]: residue.chain = newchain for atom in residue: atom.chain = newchain mol = mdt.Molecule(mol) mdt.helpers.assign_biopolymer_bonds(mol) mol.name = '%s-DNA Helix: %s' % (helix_type.upper(), sequence) return mol job = pyccc.Job(command='nab -o buildbdna build.nab && ./buildbdna', image=mdt.compute.get_image_path(IMAGE), inputs={'build.nab': '\n'.join(infile)}, name='NAB_build_dna', when_finished=finish_job) return mdt.compute.run_job(job, _return_result=True, **kwargs)
def _make_minimization_job(self, nsteps): params, inputfiles = self._prep_calculation([self.DEFAULT_PROPERTIES]) params['runType'] = 'minimization' if nsteps is not None: params['minimization_steps'] = 100 inputfiles['params.json'] = mdt.utils.json_dumps(dict(params)) job = pyccc.Job(image=mdt.compute.get_image_path(self.IMAGE), command='%s && %s' % (self.RUNNER, self.PARSER), inputs=inputfiles, when_finished=self.finish_min, name='%s/%s' % (self.MODELNAME, self.mol.name)) return job
def make_job(self, **kwargs): import pyccc from . import compute kwargs['submit'] = False if self.run_local: kwargs['engine'] = pyccc.Subprocess() elif self.docker_image is not None: kwargs['image'] = self.docker_image else: kwargs['image'] = compute.get_image_path(self.docker_image_label) job = pyccc.Job(**kwargs) return job
def _make_minimization_job(self, nsteps): self.prep() parameters = self._jobparams.copy() parameters['runType'] = 'minimization' if nsteps is not None: parameters['minimization_steps'] = 100 job = pyccc.Job( # image=mdt.compute.get_image_path(IMAGE), image=IMAGE, command='run.py && getresults.py', inputs={'input.xyz': self.mol.write(format='xyz'), 'params.json': json.dumps(parameters)}, when_finished=self.finish_min, name='nwchem/%s' % self.mol.name) return job
def run_tleap(mol, forcefields=None, parameters=None, **kwargs): """ Drives tleap to create a prmtop and inpcrd file. Specifically uses the AmberTools 16 tleap distribution. Defaults are as recommended in the ambertools manual. Args: mol (moldesign.Molecule): Molecule to set up forcefields (List[str]): list of the names of forcefields to use (see AmberTools manual for descriptions) parameters (List[ExtraAmberParameters]): (optional) list of amber parameters for non-standard residues **kwargs: keyword arguments to :meth:`compute.run_job` References: Ambertools Manual, http://ambermd.org/doc12/Amber16.pdf. See page 33 for forcefield recommendations. """ # Prepare input for tleap if forcefields is None: forcefields = mdt.forcefields.ffdefaults.values() leapstr = ['source %s' % LEAPRCFILES[ff] for ff in forcefields] inputs = {'input.pdb': mol.write(format='pdb')} if parameters: if hasattr(parameters, 'lib') or hasattr(parameters, 'frcmod'): parameters = [parameters] for ipmtr, p in enumerate(parameters): frcname = 'res%d.frcmod' % ipmtr libname = 'res%d.lib' % ipmtr inputs[frcname] = p.frcmod inputs[libname] = p.lib leapstr.append('loadAmberParams %s' % frcname) leapstr.append('loadoff %s' % libname) leapstr.append('mol = loadpdb input.pdb\n' "check mol\n" "saveamberparm mol output.prmtop output.inpcrd\n" "savepdb mol output.pdb\n" "quit\n") inputs['input.leap'] = '\n'.join(leapstr) job = pyccc.Job(image=compute.get_image_path(IMAGE), command='tleap -f input.leap', inputs=inputs, name="tleap, %s" % mol.name) return compute.run_job(job, **kwargs)
def _make_calculation_job(self, requests=None): self.prep() parameters = self._jobparams.copy() parameters['runType'] = 'singlePoint' parameters['properties'] = list(requests) if self.mol.constraints: self.write_constraints(parameters) job = pyccc.Job( # image=mdt.compute.get_image_path(IMAGE), image=IMAGE, command='run.py && getresults.py', inputs={'input.xyz': self.mol.write(format='xyz'), 'params.json': json.dumps(parameters)}, when_finished=self.finish, name='nwchem/%s'%self.mol.name) return job
def name_to_smiles(name, **kwargs): command = 'opsin -osmi input.txt output.txt' def finish_job(job): smistring = job.get_output('output.txt').read().strip() if not smistring: raise ValueError('Could not parse chemical name "%s"' % name) else: return smistring job = pyccc.Job(image=mdt.compute.get_image_path(IMAGE), command=command, name="opsin, %s" % name, inputs={'input.txt': name + '\n'}, when_finished=finish_job) return mdt.compute.run_job(job, _return_result=True, **kwargs)
def run_tleap(mol, protein='ff14SB', dna='OL15', rna='OL3', carbohydrate='GLYCAM_06j-1', lipid='lipid14', water='tip3p', organic='gaff2', off_files=(), frcmod_files=(), **kwargs): """ Drives tleap to create a prmtop and inpcrd file. Specifically uses the AmberTools 16 tleap distribution. Defaults are as recommended in the ambertools manual. Args: mol (moldesign.Molecule): Molecule to set up protein (str): protein forcefield name (default:ff14SB) dna (str): dna forcefield name (default: OL15) rna (str): rna forcefield name (default: OL3) carbohydrate (str): carbohydrate forcefield name (default: GLYCAM_06j) lipid (str): lipid forcefield name (default: lipid14) water (str): water forcefield name (default: tip3p) organic (str): organic forcefield name (default: gaff2) off_files (List[batch.FileContainer]): frcmod_files (List[batch.FileContainer]): **kwargs: keyword arguments to :meth:`compute.run_job` References: Ambertools Manual, http://ambermd.org/doc12/Amber16.pdf. See page 33 for forcefield recommendations. """ # Prepare input for tleap leapstr = [ 'source %s' % LEAPRCFILES[ff] for ff in (protein, dna, rna, carbohydrate, lipid, water, organic) ] for frcmod in frcmod_files: fname = frcmod.dumphere() leapstr.append('loadamberparam %s' % fname) for off in off_files: fname = off.dumphere() leapstr.append('loadoff %s' % fname) leapstr.append('mol = loadpdb input.pdb\n' "check mol\n" "saveamberparm mol output.prmtop output.inpcrd\n" "quit\n") # Launch the job inputs = { 'input.pdb': mol.write(format='pdb'), 'input.leap': '\n'.join(leapstr) } job = pyccc.Job(image=compute.get_image_path(IMAGE), command='tleap -f input.leap', inputs=inputs, name="tleap, %s" % mol.name) return compute.run_job(job, **kwargs)
def parameterize(mol, charges='esp', ffname='gaff2', **kwargs): """Parameterize ``mol``, typically using GAFF parameters. This will both assign a forcefield to the molecule (at ``mol.ff``) and produce the parameters so that they can be used in other systems (e.g., so that this molecule can be simulated embedded in a larger protein) Note: 'am1-bcc' and 'gasteiger' partial charges will be automatically computed if necessary. Other charge types must be precomputed. Args: mol (moldesign.Molecule): charges (str or dict): what partial charges to use? Can be a dict (``{atom:charge}``) OR a string, in which case charges will be read from ``mol.properties.[charges name]``; typical values will be 'esp', 'mulliken', 'am1-bcc', etc. Use 'zero' to set all charges to 0 (for QM/MM and testing) ffname (str): Name of the gaff-like forcefield file (default: gaff2) Returns: ExtraAmberParameters: Parameters for the molecule; this object can be used to create forcefield parameters for other systems that contain this molecule """ # Check that there's only 1 residue, give it a name assert mol.num_residues == 1 if mol.residues[0].resname is None: mol.residues[0].resname = 'UNL' print 'Assigned residue name "UNL" to %s' % mol resname = mol.residues[0].resname # check that atoms have unique names if len(set(atom.name for atom in mol.atoms)) != mol.num_atoms: raise ValueError( 'This molecule does not have uniquely named atoms, cannot assign FF' ) if charges == 'am1-bcc' and 'am1-bcc' not in mol.properties: calc_am1_bcc_charges(mol) elif charges == 'gasteiger' and 'gasteiger' not in mol.properties: calc_gasteiger_charges(mol) if charges == 'zero': charge_array = [0.0 for atom in mol.atoms] elif isinstance(charges, basestring): charge_array = u.array( [mol.properties[charges][atom] for atom in mol.atoms]) if not charge_array.dimensionless: # implicitly convert floats to fundamental charge units charge_array = charge_array.to(u.q_e).magnitude else: charge_array = [charges[atom] for atom in mol.atoms] inputs = { 'mol.mol2': mol.write(format='mol2'), 'mol.charges': '\n'.join(map(str, charge_array)) } cmds = [ 'antechamber -i mol.mol2 -fi mol2 -o mol_charged.mol2 ' ' -fo mol2 -c rc -cf mol.charges -rn %s' % resname, 'parmchk -i mol_charged.mol2 -f mol2 -o mol.frcmod', 'tleap -f leap.in', 'sed -e "s/tempresname/%s/g" mol_rename.lib > mol.lib' % resname ] inputs['leap.in'] = '\n'.join([ "source leaprc.%s" % ffname, "tempresname = loadmol2 mol_charged.mol2", "fmod = loadamberparams mol.frcmod", "check tempresname", "saveoff tempresname mol_rename.lib", "saveamberparm tempresname mol.prmtop mol.inpcrd", "quit\n" ]) def finish_job(j): param = ExtraAmberParameters(j.get_output('mol.lib'), j.get_output('mol.frcmod'), j) tempmol = mdt.assign_forcefield(mol, parameters=param) mol.ff = tempmol.ff return param job = pyccc.Job(image=mdt.compute.get_image_path(IMAGE), command=' && '.join(cmds), inputs=inputs, when_finished=finish_job, name="GAFF assignment: %s" % mol.name) return mdt.compute.run_job(job, _return_result=True, **kwargs)