def test_partial_bondorder(verbose=False): """Test setup of a molecule which activates partial bond order code.""" from openeye import oechem mol = oechem.OEMol() from openeye import oeiupac oeiupac.OEParseIUPACName(mol, 'benzene') positions = positions_from_oemol(mol) oechem.OETriposAtomNames(mol) topology = generateTopologyFromOEMol(mol) # Load forcefield from above ffxml = StringIO(ffxml_contents) ff = ForceField(ffxml) # Set up once using AM1BCC charges system = ff.createSystem(topology, [mol], chargeMethod='OECharges_AM1BCCSym', verbose=verbose) # Check that energy is what it ought to be -- the partial bond order # for benzene makes the energy a bit higher than it would be without it energy = get_energy(system, positions) if energy < 7.50 or energy > 7.60: raise Exception( "Partial bond order code seems to have issues, as energy for benzene is outside of tolerance in tests." ) # Set up once also without asking for charges system = ff.createSystem(topology, [mol], verbose=verbose) energy = get_energy(system, positions) # Energy is lower with user supplied charges (which in this case are zero) if energy < 4.00 or energy > 6.0: raise Exception( "Partial bond order code seems to have issues when run with user-provided charges, as energy for benzene is out of tolerance in tests." )
def test_gromacs_roundtrip(): """Save a System (a mixture) to GROMACS, read back in, verify yields same energy and force terms.""" forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) filename = get_data_filename( os.path.join('systems', 'packmol_boxes', 'cyclohexane_ethanol_0.4_0.6.pdb')) from simtk.openmm.app import PDBFile pdbfile = PDBFile(filename) mol2files = [ get_data_filename(os.path.join('systems', 'monomers', 'ethanol.mol2')), get_data_filename( os.path.join('systems', 'monomers', 'cyclohexane.mol2')) ] flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield mols = [] mol = oechem.OEMol() for mol2file in mol2files: ifs = oechem.oemolistream(mol2file) ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) mol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, mol): oechem.OETriposAtomNames(mol) mols.append(oechem.OEGraphMol(mol)) # setup system system = forcefield.createSystem(pdbfile.topology, mols) # Create ParmEd structure, save to AMBER a, topfile = tempfile.mkstemp(suffix='.top') a, grofile = tempfile.mkstemp(suffix='.gro') save_system_to_gromacs(pdbfile.topology, system, pdbfile.positions, topfile, grofile) # Read back in and cross-check energies top = parmed.load_file(topfile) gro = parmed.load_file(grofile) gromacssys = top.createSystem(nonbondedMethod=app.NoCutoff, constraints=None, implicitSolvent=None) groups0, groups1, energy0, energy1 = compare_system_energies( pdbfile.topology, pdbfile.topology, gromacssys, system, pdbfile.positions, verbose=False) # Remove temp files os.remove(topfile) os.remove(grofile)
def test_smirff_energies_vs_parmatfrosst(verbose=False): """Test evaluation of energies from parm@frosst ffxml files versus energies of equivalent systems.""" from openeye import oechem prefix = 'AlkEthOH_' molecules = ['r118', 'r12', 'c1161', 'r0', 'c100', 'c38', 'c1266'] # Loop over molecules, load OEMols and prep for comparison/do comparison for molnm in molecules: f_prefix = os.path.join('molecules', prefix + molnm) mol2file = get_data_filename(f_prefix + '.mol2') prmtop = get_data_filename(f_prefix + '.top') crd = get_data_filename(f_prefix + '.crd') # Load special parm@frosst with parm99/parm@frosst bugs re-added for testing forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH_parmAtFrosst.ffxml')) # Load OEMol mol = oechem.OEGraphMol() ifs = oechem.oemolistream(mol2file) flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) oechem.OEReadMolecule(ifs, mol) oechem.OETriposAtomNames(mol) # Do comparison results = compare_molecule_energies(prmtop, crd, forcefield, mol, verbose=verbose)
def test_merge_system(): """Test merging of a system created from AMBER and another created from SMIRFF.""" #Create System from AMBER prefix = os.path.join('systems', 'amber', 'cyclohexane_ethanol_0.4_0.6') prmtop = get_data_filename(prefix + '.prmtop') incrd = get_data_filename(prefix + '.inpcrd') topology0, system0, positions0 = create_system_from_amber(prmtop, incrd) from openeye import oechem # Load simple OEMol ifs = oechem.oemolistream( get_data_filename('molecules/AlkEthOH_c100.mol2')) mol = oechem.OEMol() flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) oechem.OEReadMolecule(ifs, mol) oechem.OETriposAtomNames(mol) # Load forcefield file forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) topology1, system1, positions1 = create_system_from_molecule( forcefield, mol) merge_system(topology0, topology1, system0, system1, positions0, positions1, verbose=True)
def test_improper(verbose=False): """Test implement of impropers on benzene.""" from openeye import oechem # Load benzene ifs = oechem.oemolistream(get_data_filename('molecules/benzene.mol2')) mol = oechem.OEMol() flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) oechem.OEReadMolecule(ifs, mol) ifs.close() # Load forcefield ffxml = get_data_filename('forcefield/benzene_minimal.ffxml') ff = ForceField(ffxml) # Load AMBER files and compare crd = get_data_filename('molecules/benzene.crd') top = get_data_filename('molecules/benzene.top') g0, g1, e0, e1 = compare_molecule_energies(top, crd, ff, mol, skip_assert=True) # Check that torsional energies the same to 1 in 10^6 rel_error = np.abs((g0['torsion'] - g1['torsion']) / g0['torsion']) if rel_error > 2e-5: #Note that this will not be tiny because we use six-fold impropers and they use a single improper raise Exception( "Improper torsion energy for benzene differs too much (relative error %.4g) between AMBER and SMIRFF." % rel_error)
def test_MDL_aromaticity(verbose=False): """Test support for alternate aromaticity models.""" ffxml = StringIO(ffxml_MDL_contents) ff = ForceField(ffxml) from openeye import oechem mol = oechem.OEMol() oechem.OEParseSmiles(mol, 'c12c(cccc1)occ2') oechem.OEAddExplicitHydrogens(mol) labels = ff.labelMolecules([mol], verbose=True) # The bond 6-7 should get the b16 parameter iff the MDL model is working, otherwise it will pick up just the generic details = labels[0]['HarmonicBondGenerator'] found = False for (atom_indices, pid, smirks) in details: if pid == 'b16' and atom_indices == [6, 7]: found = True if not found: raise Exception("Didn't find right param.")
def test_create_system_boxes_parmatfrosst(verbose=False): """Test creation of a System object from some boxes of mixed solvents to test parm@frosst forcefield. """ forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) for f in check_boxes(forcefield, description="to test Parm@frosst parameters", verbose=verbose): yield f
def test_create_system_molecules_parmatfrosst_gbsa(verbose=False): """Test creation of a System object from small molecules to test parm@frosst forcefield with GBSA support. """ forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH_GBSA.ffxml')) for f in check_AlkEtOH(forcefield, "to test Parm@Frosst parameters", verbose=verbose): yield f
def test_molecule_labeling(verbose=False): """Test using labelMolecules to see which parameters applied to an oemol.""" from openeye import oechem mol = oechem.OEMol() oechem.OEParseSmiles(mol, 'CCC') oechem.OEAddExplicitHydrogens(mol) ff = ForceField(get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) labels = ff.labelMolecules([mol], verbose=verbose) # Check that force terms aren't empty print(labels[0].keys()) if not 'HarmonicBondGenerator' in labels[0].keys(): raise Exception("No force term assigned for harmonic bonds.") if not 'HarmonicAngleGenerator' in labels[0].keys(): raise Exception("No force term assigned for harmonic angles.") if not 'PeriodicTorsionGenerator' in labels[0].keys(): raise Exception("No force term assigned for periodic torsions.") if not 'NonbondedGenerator' in labels[0].keys(): raise Exception("No nonbonded force term assigned.")
def test_molecule_labeling(verbose = False): """Test using labelMolecules to see which parameters applied to an oemol.""" from openeye import oechem mol = oechem.OEMol() oechem.OEParseSmiles(mol, 'CCC') oechem.OEAddExplicitHydrogens(mol) ff = ForceField(get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) labels = ff.labelMolecules( [mol], verbose = verbose) # Check that force terms aren't empty print(labels[0].keys()) if not 'HarmonicBondGenerator' in labels[0].keys(): raise Exception("No force term assigned for harmonic bonds.") if not 'HarmonicAngleGenerator' in labels[0].keys(): raise Exception("No force term assigned for harmonic angles.") if not 'PeriodicTorsionGenerator' in labels[0].keys(): raise Exception("No force term assigned for periodic torsions.") if not 'NonbondedGenerator' in labels[0].keys(): raise Exception("No nonbonded force term assigned.")
def test_create_system_molecules_features(verbose=False): """Test creation of a System object from small molecules to test various ffxml features """ ffxml = StringIO(ffxml_contents) forcefield = ForceField(ffxml) for chargeMethod in [None, 'BCC', 'OECharges_AM1BCCSym']: for f in check_AlkEtOH( forcefield, description="to test ffxml features with charge method %s" % str(chargeMethod), chargeMethod=chargeMethod, verbose=verbose): yield f
def test_create_system_boxes_features(verbose=False): """Test creation of a System object from some boxes of mixed solvents to test ffxml features. """ ffxml = StringIO(ffxml_contents) forcefield = ForceField(ffxml) for chargeMethod in [None, 'BCC', 'OECharges_AM1BCCSym']: for f in check_boxes( forcefield, description= "to test Parm@frosst parameters with charge method %s" % str(chargeMethod), chargeMethod=chargeMethod, verbose=verbose): yield f
def test_change_parameters(verbose=False): """Test modification of forcefield parameters.""" from openeye import oechem # Load simple OEMol ifs = oechem.oemolistream( get_data_filename('molecules/AlkEthOH_c100.mol2')) mol = oechem.OEMol() flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) oechem.OEReadMolecule(ifs, mol) oechem.OETriposAtomNames(mol) # Load forcefield file ffxml = get_data_filename('forcefield/Frosst_AlkEtOH.ffxml') ff = ForceField(ffxml) from smarty.forcefield import generateTopologyFromOEMol topology = generateTopologyFromOEMol(mol) # Create initial system system = ff.createSystem(topology, [mol], verbose=verbose) # Get initial energy before parameter modification positions = positions_from_oemol(mol) old_energy = get_energy(system, positions) # Get params for an angle params = ff.getParameter(smirks='[a,A:1]-[#6X4:2]-[a,A:3]') # Modify params params['k'] = '0.0' ff.setParameter(params, smirks='[a,A:1]-[#6X4:2]-[a,A:3]') # Write params ff.writeFile(tempfile.TemporaryFile(suffix='.ffxml')) # Make sure params changed energy! (Test whether they get rebuilt on system creation) system = ff.createSystem(topology, [mol], verbose=verbose) energy = get_energy(system, positions) if verbose: print("New energy/old energy:", energy, old_energy) if np.abs(energy - old_energy) < 0.1: raise Exception("Error: Parameter modification did not change energy.")
def test_change_parameters(verbose=False): """Test modification of forcefield parameters.""" from openeye import oechem # Load simple OEMol ifs = oechem.oemolistream(get_data_filename('molecules/AlkEthOH_c100.mol2')) mol = oechem.OEMol() flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield ifs.SetFlavor( oechem.OEFormat_MOL2, flavor) oechem.OEReadMolecule(ifs, mol ) oechem.OETriposAtomNames(mol) # Load forcefield file ffxml = get_data_filename('forcefield/Frosst_AlkEtOH.ffxml') ff = ForceField(ffxml) from smarty.forcefield import generateTopologyFromOEMol topology = generateTopologyFromOEMol(mol) # Create initial system system = ff.createSystem(topology, [mol], verbose=verbose) # Get initial energy before parameter modification positions = positions_from_oemol(mol) old_energy=get_energy(system, positions) # Get params for an angle params = ff.getParameter(smirks='[a,A:1]-[#6X4:2]-[a,A:3]') # Modify params params['k']='0.0' ff.setParameter(params, smirks='[a,A:1]-[#6X4:2]-[a,A:3]') # Write params ff.writeFile( tempfile.TemporaryFile(suffix='.ffxml') ) # Make sure params changed energy! (Test whether they get rebuilt on system creation) system=ff.createSystem(topology, [mol], verbose=verbose) energy=get_energy(system, positions) if verbose: print("New energy/old energy:", energy, old_energy) if np.abs(energy-old_energy)<0.1: raise Exception("Error: Parameter modification did not change energy.")
def test_parameter_completeness_check(self): """Test that proper exceptions are raised if a force field fails to assign parameters to valence terms in a molecule.""" from openeye import oechem mol = oechem.OEMol() oechem.OEParseSmiles(mol, 'CCC') oechem.OEAddExplicitHydrogens(mol) oechem.OETriposAtomNames(mol) ff = ForceField(get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) topology = generateTopologyFromOEMol(mol) # Test nonbonded error checking by wiping out required LJ parameter params = ff.getParameter(paramID='n0001') params['smirks'] = '[#136:1]' ff.setParameter(paramID='n0001', params=params) ff.setParameter(paramID='n0002', params=params) with self.assertRaises(Exception): system = ff.createSystem(topology, [mol]) ff = ForceField(get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) # Test bond error checking by wiping out a required bond parameter params = ff.getParameter(paramID='b0001') params['smirks'] = '[#136:1]~[*:2]' ff.setParameter(paramID='b0001', params=params) with self.assertRaises(Exception): system = ff.createSystem(topology, [mol]) ff = ForceField(get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) # Test angle error checking by wiping out a required angle parameter params = ff.getParameter(paramID='a0001') params['smirks'] = '[#136:1]~[*:2]~[*:3]' ff.setParameter(paramID='a0001', params=params) with self.assertRaises(Exception): system = ff.createSystem(topology, [mol]) ff = ForceField(get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) # Test torsion error checking by wiping out a required torsion parameter params = ff.getParameter(paramID='t0001') params['smirks'] = '[#136:1]~[*:2]~[*:3]~[*:4]' ff.setParameter(paramID='t0001', params=params) ff.setParameter(paramID='t0004', params=params) with self.assertRaises(Exception): system = ff.createSystem(topology, [mol]) ff = ForceField(get_data_filename('forcefield/Frosst_AlkEtOH.ffxml'))
def get_molecule_parameterIDs( oemols, ffxml): """Process a list of oemols with a specified SMIRFF ffxml file and determine which parameters are used by which molecules, returning collated results. Parameters ---------- oemols : list List of OpenEye OEChem molecules to parse; must have explicit hydrogens. Returns ------- parameters_by_molecule : dict Parameter IDs used in each molecule, keyed by isomeric SMILES generated from provided OEMols. Each entry in the dict is a list which does not necessarily have unique entries; i.e. parameter IDs which are used more than once will occur multiple times. parameters_by_ID : dict Molecules in which each parameter ID occur, keyed by parameter ID. Each entry in the dict is a set of isomeric SMILES for molecules in which that parameter occurs. No frequency information is stored. """ # Create storage parameters_by_molecule = {} parameters_by_ID = {} # Generate isomeric SMILES isosmiles = list() for mol in oemols: smi = oechem.OECreateIsoSmiString(mol) if not smi in isosmiles: isosmiles.append(smi) # If the molecule is already here, raise exception else: raise ValueError("Error: get_molecule_parameterIDs has been provided a list of oemols which contains the same molecule, having isomeric smiles %s, more than once." % smi ) # Label molecules ff = ForceField( ffxml ) labels = ff.labelMolecules( oemols ) # Organize labels into output dictionary by looping over all molecules/smiles for idx in range(len(isosmiles)): # Pull smiles, initialize storage smi = isosmiles[idx] parameters_by_molecule[smi] = [] # Organize data for this molecule data = labels[idx] for force_type in data.keys(): for (atom_indices, pid, smirks) in data[force_type]: # Store pid to molecule parameters_by_molecule[smi].append(pid) # Store which molecule this pid occurred in if pid not in parameters_by_ID: parameters_by_ID[pid] = set() parameters_by_ID[pid].add(smi) else: parameters_by_ID[pid].add(smi) return parameters_by_molecule, parameters_by_ID
def test_read_ffxml_gbsa(): """Test reading of ffxml files with GBSA support. """ forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH_GBSA.ffxml'))
def test_component_combination(): """Test that a system still yields the same energy after building it again out of its components.""" # We've had issues where subsequent instances of a molecule might have zero charges # Here we'll try to catch this (and also explicitly check the charges) by re-building # a system out of its components forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH.ffxml')) filename = get_data_filename( os.path.join('systems', 'packmol_boxes', 'cyclohexane_ethanol_0.4_0.6.pdb')) from simtk.openmm.app import PDBFile pdbfile = PDBFile(filename) mol2files = [ get_data_filename(os.path.join('systems', 'monomers', 'ethanol.mol2')), get_data_filename( os.path.join('systems', 'monomers', 'cyclohexane.mol2')) ] flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield mols = [] mol = oechem.OEMol() for mol2file in mol2files: ifs = oechem.oemolistream(mol2file) ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) mol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, mol): oechem.OETriposAtomNames(mol) mols.append(oechem.OEGraphMol(mol)) # setup system system = forcefield.createSystem(pdbfile.topology, mols, chargeMethod='OECharges_AM1BCCSym') # Make parmed structure structure = parmed.openmm.topsystem.load_topology(pdbfile.topology, system, pdbfile.positions) # Split the system, then re-compose it out of its components tmp = structure.split() strs, nums = [], [] for s, n in tmp: strs.append(s) nums.append(n) nums = [len(n) for n in nums] # Re-compose system from components new_structure = strs[0] * nums[0] for idx in range(1, len(nums)): new_structure += strs[idx] * nums[idx] # Swap in coordinates again new_structure.positions = structure.positions # Create System newsys = new_structure.createSystem(nonbondedMethod=app.NoCutoff, constraints=None, implicitSolvent=None) # Cross check energies groups0, groups1, energy0, energy1 = compare_system_energies( pdbfile.topology, pdbfile.topology, system, newsys, pdbfile.positions, verbose=False) # Also check that that the number of components is equal to the number I expect if not len(nums) == 2: print("Error: Test system has incorrect number of components.") raise Exception( 'Incorrect number of components in cyclohexane/ethanol test system.' ) # Also check that none of residues have zero charge for resnr in range(len(structure.residues)): abscharges = [ abs(structure.residues[resnr].atoms[idx].charge) for idx in range(len(structure.residues[resnr].atoms)) ] if sum(abscharges) == 0: raise Exception( 'Error: Residue %s in cyclohexane-ethanol test system has a charge of zero, which is incorrect.' % resnr)
def test_read_ffxml(): """Test reading of ffxml files. """ forcefield = ForceField( get_data_filename('forcefield/Frosst_AlkEtOH.ffxml'))
def convert_frcmod_to_ffxml( infile, inxml, outxml ): """Convert a modified AMBER frcmod (with SMIRKS replacing atom types) to SMIRFF ffxml format by inserting parameters into a template ffxml file. Parameters ---------- infile : str File name of input SMIRKS-ified frcmod file containing parameters inxml : str File name of template SMIRFF FFXML file into which to insert these parameters. outxml : str File name of resulting output SMIRFF FFXML Notes: ------- Input XML file will normally be the template of a SMIRFF XML file without any parameters present (but with requisite force types already specified). """ # Obtain sections from target file file = open(infile, 'r') text = file.readlines() file.close() sections = {} # Section names from frcmod which we will parse secnames = ['NONBON', 'BOND', 'ANGL', 'IMPR', 'DIHE'] # Tags that will be used in the FFXML for these (same order) tag = ['Atom', 'Bond', 'Angle', 'Improper', 'Proper'] # Force names in the FFXML (same order) force_section = ['NonbondedForce', 'HarmonicBondForce', 'HarmonicAngleForce', 'PeriodicTorsionForce', 'PeriodicTorsionForce'] ct = 0 thissec = None # Why is this a while loop and not a for line in text loop? while ct < len(text): line = text[ct] tmp = line.split() # Skip lines starting with comment or which are blank if line[0]=='#' or len(tmp) < 1: ct+=1 continue # Check first entry to see if it's a section name, if so initialize storage if tmp[0] in secnames: thissec = tmp[0] sections[thissec] = [] # Otherwise store else: sections[thissec].append(line) ct+=1 # Read template forcefield file ff = ForceField(inxml) # Use functions to parse sections from target file and add parameters to force field param_id_by_section={} param_prefix_by_sec = {'NONBON':'n' , 'BOND':'b', 'ANGL':'a', 'DIHE':'t', 'IMPR':'i'} for (idx, name) in enumerate(secnames): param_id_by_section[name] = 1 for line in sections[name]: # Parse line for parameters if name=='NONBON': params = _parse_nonbon_line(line) elif name=='BOND': params = _parse_bond_line(line) elif name=='DIHE': params = _parse_dihe_line(line) elif name=='IMPR': params = _parse_impr_line(line) elif name=='ANGL': params = _parse_angl_line(line) # Add parameter ID params['id'] = param_prefix_by_sec[name]+str( param_id_by_section[name] ) smirks = params['smirks'] #Check smirks is valid for chemical enviroment parsing: env = environment.ChemicalEnvironment(smirks) # If it's not a torsion, just store in straightforward way if not (name=='IMPR' or name=='DIHE'): # Check for duplicates first if ff.getParameter( smirks, force_type = force_section[idx] ): raise ValueError("Error: parameter for %s is already present in forcefield." % smirks ) else: ff.addParameter( params, smirks, force_section[idx], tag[idx] ) # Increment parameter id param_id_by_section[name] +=1 # If it's a torsion, check to see if there are already parameters and # if so, add a new term to this torsion else: # If we have parameters already oldparams = ff.getParameter(smirks, force_type=force_section[idx]) if oldparams: # Find what number to use idnr = 1 paramtag = 'k%s' % idnr # This was "while paramtag in params" so it was overwriting k2 etc. while paramtag in oldparams: idnr+=1 paramtag = 'k%s' % idnr # Construct new param object with updated numbers for paramtag in ('periodicity1', 'phase1', 'idivf1', 'k1'): if paramtag in params: val = params.pop(paramtag) oldparams[paramtag[:-1]+str(idnr) ] = val # Store ff.setParameter( oldparams, smirks=smirks, force_type=force_section[idx]) else: # Otherwise, just store new parameters ff.addParameter( params, smirks, force_section[idx], tag[idx]) # Increment parameter ID param_id_by_section[name] += 1 # Write SMIRFF XML file ff.writeFile(outxml) # Roundtrip to fix formatting (for some reason etree won't format it properly on first write after modification) tmp = ForceField(outxml) tmp.writeFile(outxml)
def get_molecule_parameterIDs(oemols, ffxml): """Process a list of oemols with a specified SMIRFF ffxml file and determine which parameters are used by which molecules, returning collated results. Parameters ---------- oemols : list List of OpenEye OEChem molecules to parse; must have explicit hydrogens. Returns ------- parameters_by_molecule : dict Parameter IDs used in each molecule, keyed by isomeric SMILES generated from provided OEMols. Each entry in the dict is a list which does not necessarily have unique entries; i.e. parameter IDs which are used more than once will occur multiple times. parameters_by_ID : dict Molecules in which each parameter ID occur, keyed by parameter ID. Each entry in the dict is a set of isomeric SMILES for molecules in which that parameter occurs. No frequency information is stored. """ # Create storage parameters_by_molecule = {} parameters_by_ID = {} # Generate isomeric SMILES isosmiles = list() for mol in oemols: smi = oechem.OECreateIsoSmiString(mol) if not smi in isosmiles: isosmiles.append(smi) # If the molecule is already here, raise exception else: raise ValueError( "Error: get_molecule_parameterIDs has been provided a list of oemols which contains the same molecule, having isomeric smiles %s, more than once." % smi) # Label molecules ff = ForceField(ffxml) labels = ff.labelMolecules(oemols) # Organize labels into output dictionary by looping over all molecules/smiles for idx in range(len(isosmiles)): # Pull smiles, initialize storage smi = isosmiles[idx] parameters_by_molecule[smi] = [] # Organize data for this molecule data = labels[idx] for force_type in data.keys(): for (atom_indices, pid, smirks) in data[force_type]: # Store pid to molecule parameters_by_molecule[smi].append(pid) # Store which molecule this pid occurred in if pid not in parameters_by_ID: parameters_by_ID[pid] = set() parameters_by_ID[pid].add(smi) else: parameters_by_ID[pid].add(smi) return parameters_by_molecule, parameters_by_ID