Ejemplo n.º 1
0
def gen_canonical_isomeric_smiles(oemol):
    # 1. Create an OpenFF molecule from the OpenEye molecule, guessing the
    #    stereochemistry if needed.
    oe_molecule = oechem.OEMol(oemol)
    try:
        molecule = Molecule.from_openeye(oe_molecule)
    except:
        molecule = Molecule.from_openeye(oe_molecule,
                                         allow_undefined_stereo=True)
        stereoisomers = molecule.enumerate_stereoisomers(undefined_only=True,
                                                         max_isomers=1)
        if len(stereoisomers) > 0:
            molecule = stereoisomers[0]
    # 2. Canonically order the molecule
    molecule = molecule.canonical_order_atoms()
    # 3. Figure out which atoms in the canonical molecule should be tagged.
    mapped_smiles = oechem.OEMolToSmiles(oe_molecule)
    torsion_match = molecule.chemical_environment_matches(mapped_smiles)[0]
    # 4. Generate a canonical isomeric mapped smiles
    molecule.properties["atom_map"] = {
        j: i + 1
        for i, j in enumerate(torsion_match)
    }
    center_bond = set(molecule.properties["atom_map"].keys())
    canonical_isomeric_smiles = molecule.to_smiles(isomeric=True,
                                                   explicit_hydrogens=True,
                                                   mapped=False)
    return molecule, canonical_isomeric_smiles, center_bond
Ejemplo n.º 2
0
def test_mapping_strength_levels(pairs_of_smiles=[('Cc1ccccc1','c1ccc(cc1)N'),('CC(c1ccccc1)','O=C(c1ccccc1)'),('Oc1ccccc1','Sc1ccccc1')],test=True):

    correct_results = {0:{'default': (3,2), 'weak':(3,2), 'strong':(4,3)},
                       1:{'default': (7,3), 'weak':(6,2), 'strong':(7,3)},
                       2:{'default': (1,1), 'weak':(1,1), 'strong':(2,2)}}

    mapping = ['weak','default','strong']

    for example in mapping:
        for index, (lig_a, lig_b) in enumerate(pairs_of_smiles):
            print(f"conducting {example} mapping with ligands {lig_a}, {lig_b}")
            initial_molecule = smiles_to_oemol(lig_a)
            proposed_molecule = smiles_to_oemol(lig_b)
            molecules = [Molecule.from_openeye(mol) for mol in [initial_molecule, proposed_molecule]]
            system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs,nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                                 small_molecule_forcefield = 'gaff-1.81', molecules=molecules, cache=None)
            proposal_engine = SmallMoleculeSetProposalEngine([initial_molecule, proposed_molecule], system_generator)
            initial_system, initial_positions, initial_topology = OEMol_to_omm_ff(initial_molecule, system_generator)
            print(f"running now with map strength {example}")
            proposal = proposal_engine.propose(initial_system, initial_topology, map_strength = example)
            print(lig_a, lig_b,'length OLD and NEW atoms',len(proposal.unique_old_atoms), len(proposal.unique_new_atoms))
            if test:
                render_atom_mapping(f'{index}-{example}.png', initial_molecule, proposed_molecule, proposal._new_to_old_atom_map)
                assert ( (len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) == correct_results[index][example]), f"the mapping failed, correct results are {correct_results[index][example]}"
                print(f"the mapping worked!!!")
            print()
Ejemplo n.º 3
0
def test_small_molecule_proposals():
    """
    Make sure the small molecule proposal engine generates molecules
    """
    list_of_smiles = ['CCCC','CCCCC','CCCCCC']
    list_of_mols = []
    for smi in list_of_smiles:
        mol = smiles_to_oemol(smi)
        list_of_mols.append(mol)
    molecules = [Molecule.from_openeye(mol) for mol in list_of_mols]
    stats_dict = defaultdict(lambda: 0)
    system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                         small_molecule_forcefield = small_molecule_forcefield, molecules=molecules, cache=None)
    proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_mols, system_generator)
    initial_system, initial_positions, initial_topology,  = OEMol_to_omm_ff(list_of_mols[0], system_generator)

    proposal = proposal_engine.propose(initial_system, initial_topology)

    for i in range(50):
        #positions are ignored here, and we don't want to run the geometry engine
        new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology)
        stats_dict[new_proposal.new_chemical_state_key] += 1
        #check that the molecule it generated is actually the smiles we expect
        matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL']
        if len(matching_molecules) != 1:
            raise ValueError("More than one residue with the same name!")
        mol_res = matching_molecules[0]
        oemol = generateOEMolFromTopologyResidue(mol_res)
        smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol))
        assert smiles == proposal.new_chemical_state_key
        proposal = new_proposal
Ejemplo n.º 4
0
def molecule_from_record(record: MoleculeESPRecord) -> Molecule:
    """Converts an ``openff-recharge`` ESP record to to an Open Force Field
    molecule."""

    oe_molecule = oechem.OEMol()
    oechem.OESmilesToMol(oe_molecule, record.tagged_smiles)
    ordered_conformer = reorder_conformer(oe_molecule, record.conformer)

    # Clear the records index map.
    for atom in oe_molecule.GetAtoms():
        atom.SetMapIdx(0)

    oe_molecule.DeleteConfs()
    oe_molecule.NewConf(oechem.OEFloatArray(ordered_conformer.flatten()))

    with NamedTemporaryFile(suffix=".mol2") as file:

        # Workaround for stereochemistry being incorrectly perceived.
        molecule = Molecule.from_openeye(oe_molecule,
                                         allow_undefined_stereo=True)

        molecule.to_file(file.name, "mol2")
        molecule = molecule.from_file(file.name)

    return molecule
Ejemplo n.º 5
0
def min_ffxml(mol, ffxml):

    # make copy of the input mol
    oe_mol = oechem.OEGraphMol(mol)

    try:
        # create openforcefield molecule ==> prone to triggering Exception
        off_mol = Molecule.from_openeye(oe_mol)

        # load in force field
        ff = ForceField(ffxml)

        # create components for OpenMM system
        topology = Topology.from_molecules(molecules=[off_mol])

        # create openmm system ==> prone to triggering Exception
        #system = ff.create_openmm_system(topology, charge_from_molecules=[off_mol])
        system = ff.create_openmm_system(topology)

    except Exception as e:
        smilabel = oechem.OEGetSDData(oe_mol, "SMILES QCArchive")
        print( ' >>> openforcefield failed to create OpenMM system: '
               f"'{oe_mol.GetTitle()}' '{smilabel}'")
        print(f"{e}\n")
        return

    print(" >>> successful OpenMM system creation for openforcefield "
         f"mol \"{oe_mol.GetTitle()}\"")
Ejemplo n.º 6
0
def process_molecule(smiles: str) -> Tuple[Optional[Molecule], Optional[str]]:

    error = None

    try:
        oe_molecule = smiles_to_molecule(smiles, guess_stereochemistry=True)

        # Generate a set of conformers and charges for the molecule.
        conformers = ConformerGenerator.generate(oe_molecule,
                                                 ConformerSettings())
        charges = ChargeGenerator.generate(oe_molecule, conformers,
                                           ChargeSettings())

        # Add the charges and conformers to the OE object.
        for oe_atom in oe_molecule.GetAtoms():
            oe_atom.SetPartialCharge(charges[oe_atom.GetIdx()].item())

        oe_molecule.DeleteConfs()

        for conformer in conformers:
            oe_molecule.NewConf(oechem.OEFloatArray(conformer.flatten()))

        # Map to an OpenFF molecule object.
        molecule = Molecule.from_openeye(oe_molecule)

        # Compute the WBOs
        molecule.assign_fractional_bond_orders(
            "am1-wiberg", use_conformers=molecule.conformers)

    except (BaseException, Exception) as e:
        molecule = None
        error = f"Failed to process {smiles}: {str(e)}"

    return molecule, error
Ejemplo n.º 7
0
def data_generator():
    for record_name in random.sample(list(ds_qc.data.records), 10):
        try:
            print(record_name, flush=True)
            r = ds_qc.get_record(record_name, specification='default')
            if r is not None:
                traj = r.get_trajectory()
                if traj is not None:
                    for snapshot in traj:

                        mol = snapshot.get_molecule()
                        # mol = snapshot.get_molecule().dict(encoding='json')
                        
                        xyz = tf.convert_to_tensor(
                            mol.geometry * BOHR_TO_NM,
                            dtype=tf.float32)

                        qm_force = tf.convert_to_tensor(
                            snapshot.return_result\
                            * HARTREE_PER_BOHR_TO_KJ_PER_MOL_PER_NM,
                            dtype=tf.float32)

                        mol = cmiles.utils.load_molecule(mol.dict(encoding='json'))

                        top = Topology.from_molecules(Molecule.from_openeye(mol))
                        sys = FF.create_openmm_system(top)

                        yield(
                            xyz,
                            qm_force,
                            sys)
       
        except:
            pass
def select_torsions(molecules_list_dict, molecule_attributes, forcefield, target_coverage=3):
    torsions_dict = {}
    smirks_torsions_counter = Counter()
    i_mol = 0
    for mol_index, mol_attr in molecule_attributes.items():
        central = []
        print(f'{i_mol:<7d}: {mol_index}')
        i_mol += 1
        mapped_smiles = mol_attr['canonical_isomeric_explicit_hydrogen_mapped_smiles']
        # round trip from QCFractal molecule to OpenEye molecule then to Off Molecule
        # this is needed for now to ensure atom indeices are consistent
        qcjson_mol = molecules_list_dict[mol_index][0]
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        bonds  = []
        for bond in oemol.GetBonds():
            bonds.append((bond.GetBgnIdx(), bond.GetEndIdx()))
        bond_graph = BondGraph(bonds)
        rings = bond_graph.get_rings()
        d_rings = defaultdict(set)
        for i_ring, ring in enumerate(rings):
            for atom_idx in ring:
                d_rings[atom_idx].add(i_ring)        
        off_mol = Off_Molecule.from_openeye(oemol, allow_undefined_stereo=True)
        torsions_coverage = smirnoff_analyze_torsions(forcefield, off_mol)
        for torsion_param, torsion_idx_list in torsions_coverage.items():
            smirks = torsion_param.smirks
            for atom_indices in torsion_idx_list:
                if smirks_torsions_counter[smirks] < target_coverage and torsion_param.id in list_of_tids:
                    i, j, k, l = atom_indices
                    
                    if d_rings[j] & d_rings[k]:
                        pass
                    elif set([j,k]) not in central:
                        smirks_torsions_counter[smirks] += 1
                        canonical_torsion_index = cmiles.utils.to_canonical_label(mapped_smiles, atom_indices)
                        torsions_dict[canonical_torsion_index] = {
                            'initial_molecules': molecules_list_dict[mol_index],
                            'atom_indices': [ atom_indices ],
                            'attributes': mol_attr,
                            'tid' : torsion_param.id
                        }
                        central.append(set([j,k]))
                        print(f"  - torsion {atom_indices} added for smirks {smirks}")
                elif smirks_torsions_counter[smirks] >= target_coverage and torsion_param.id in list_of_tids:
                    print(f"  - torsion {atom_indices} skipped because {smirks} have {smirks_torsions_counter[smirks]} already")
    print("\n## Selected Torsion Coverage ##\n" + '-'*90)
    ff_torsion_param_list = forcefield.get_parameter_handler('ProperTorsions').parameters
    n_covered = 0
    for param in ff_torsion_param_list:
        if param.id in list_of_tids:
            count = smirks_torsions_counter[param.smirks]
            print(f"{param.id:5s}{param.smirks:80s} : {count:7d}")
            if count > 0:
                n_covered += 1
    print('-'*90)
    print(f'{n_covered} / {len(list_of_tids)} torsion SMIRKs covered')
    return torsions_dict
Ejemplo n.º 9
0
def create_openmm_system(conversion, molecules):
    """
    Create an OpenMM system using the input MOL2 file and force field file.
    """

    molecule = Molecule.from_openeye(molecules[0])
    topology = Topology.from_molecules([molecule])
    ff = ForceField(conversion.ff)
    system = ff.create_openmm_system(topology)

    return topology, system
def gen_tid_molecules_list_of_interest(molecule_attributes,
                                       molecules_list_dict, forcefield,
                                       tid_list):
    # gen dictionary with keys, including all tids in the input forcefield
    ff_torsion_param_list = forcefield.get_parameter_handler(
        'ProperTorsions').parameters
    ff_torsion_param_list_of_interest = []
    tid_molecules_list = {}
    for torsion_param in ff_torsion_param_list:
        if torsion_param.id in tid_list:
            ff_torsion_param_list_of_interest.append(torsion_param)
            tid_molecules_list[torsion_param.id] = []

    for idx, (mol_index, mol_attr) in enumerate(molecule_attributes.items()):
        mapped_smiles = mol_attr[
            'canonical_isomeric_explicit_hydrogen_mapped_smiles']
        qcjson_mol = molecules_list_dict[mol_index][0]
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        off_mol = Off_Molecule.from_openeye(oemol, allow_undefined_stereo=True)

        torsions_coverage, center_tids = smirnoff_analysis_torsions(
            forcefield, off_mol)
        filtered_torsions_coverage = filter_torsions_coverage(
            torsions_coverage, oemol)

        for tid, indices_list in filtered_torsions_coverage.items():
            if tid in tid_list:

                for indices in indices_list:
                    covered_tids = []
                    i, j, k, l = indices
                    tids = center_tids[(j, k)]
                    for i in tids:
                        if i not in covered_tids:
                            covered_tids.append(i)
                    tid_molecules_list[tid].append({
                        'mol_index': mol_index,
                        'indices': indices,
                        'covered_tids': covered_tids
                    })
    print("\n## Torsion parameter: matched molecules ##\n" + '-' * 90)
    print(
        f"{'idx':<7} {'ID':7s} {'SMIRKS Pattern':70s} {'Number of molecules matched'}"
    )
    for idx, (tid, molecules_list) in enumerate(tid_molecules_list.items()):
        torsion_param = get_torsion_definition(
            ff_torsion_param_list_of_interest, tid)
        print(
            f'{idx:<7} {torsion_param.id:7s} {torsion_param.smirks:70s} {len(molecules_list)}'
        )
    print('-' * 90)

    return tid_molecules_list
Ejemplo n.º 11
0
def generate_selected_torsions(input_json):
    """Identify torsions that can be driven.

    Parameters
    ----------
    input_json: str,
        JSON file name to the output json of generate.py (prepared as if for an OptimizationDataset)
        The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}.

    Returns
    -------
    torsions_dict: dict
        Dictionary for selected torsions, has this structure:
        {
            canonical_torsion_index1: {
                'initial_molecules': [ Molecule1a, Molecule1b, .. ],
                'atom_indices': [ (0,1,2,3) ],
                'attributes': {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..}
            },
            ..
        }

    Note
    ----
    The 'atom_indices' in return dict value is a list with only one item, because we select only 1-D torsion for now.

    """
    molecule_data_list = read_molecules(input_json)

    # generate torsion_dict
    torsions_dict = {}
    ntorsions = 0
    for mol_index, json_mol in enumerate(molecule_data_list):
        mapped_smiles = json_mol['cmiles_identifiers']['canonical_isomeric_explicit_hydrogen_mapped_smiles']
        print(f'{mol_index} : {mapped_smiles}')
        # round trip from QCFractal molecule to OpenEye molecule then to Off Molecule
        # this is needed for now to ensure atom indices are consistent
        qcjson_mol = json_mol['initial_molecules'][0]
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        off_mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True)
        torsion_idx_list = enumerate_torsions(oemol)
        for atom_indices in torsion_idx_list:
            torsions_dict[ntorsions] = {
                'initial_molecules': [ qcjson_mol ],
                'atom_indices': [ atom_indices ],
                'attributes': json_mol['cmiles_identifiers'],
            }
            print(f"  - torsion {atom_indices} added")
            ntorsions += 1

    print(f'{ntorsions} torsions added')
    return torsions_dict
Ejemplo n.º 12
0
    def _convert_to_off(mol):
        import openforcefield

        if isinstance(mol, esp.Graph):
            return mol.mol

        elif isinstance(mol, openforcefield.topology.molecule.Molecule):
            return mol
        elif isinstance(mol, rdkit.Chem.rdchem.Mol):
            return Molecule.from_rdkit(mol)
        elif "openeye" in str(
                type(mol)):  # because we don't want to depend on OE
            return Molecule.from_openeye(mol)
Ejemplo n.º 13
0
def smiles_to_svg(smiles: str, torsion_indices: (int, int), image_width: int = 200, image_height: int = 200) -> str:
    """Renders a 2D representation of a molecule based on its SMILES representation as
    an SVG string.

    Parameters
    ----------
    smiles
        The SMILES pattern.
    torsion_indices
        The torsion indices for the molecule.
    image_width
        The width to make the final SVG.
    image_height
        The height to make the final SVG.

    Returns
    -------
        The 2D SVG representation.
    """
    
    # Parse the SMILES into an RDKit molecule
    smiles_parser = Chem.rdmolfiles.SmilesParserParams()
    smiles_parser.removeHs = False
    
    oe_conformed = False
    try:
        oe_molecule, status = smiles2oemol(smiles)
        openff_molecule = Molecule.from_openeye(oe_molecule)
        rdkit_molecule = openff_molecule.to_rdkit()
        oe_conformed = True
    except:
        rdkit_molecule = Chem.MolFromSmiles(smiles, smiles_parser)
   
    # Generate a set of 2D coordinates.
    Chem.rdDepictor.Compute2DCoords(rdkit_molecule)

    drawer = rdMolDraw2D.MolDraw2DSVG(image_width, image_height)

    torsion_bonds = []
    if oe_conformed:
        for i in range(len(torsion_indices) - 1):
            if rdkit_molecule.GetBondBetweenAtoms(torsion_indices[i], torsion_indices[i+1]):
                torsion_bonds.append(rdkit_molecule.GetBondBetweenAtoms(torsion_indices[i], torsion_indices[i+1]).GetIdx())
    
    rdMolDraw2D.PrepareAndDrawMolecule(drawer, rdkit_molecule, highlightBonds = torsion_bonds)
        
    drawer.FinishDrawing()

    svg_content = drawer.GetDrawingText()
    return svg_content
Ejemplo n.º 14
0
def test_OEMol_to_omm_ff(molecule=smiles_to_oemol('CC')):
    """
    Generating openmm objects for simulation from an OEMol object

    Parameters
    ----------
    molecule : openeye.oechem.OEMol

    Returns
    -------
    system : openmm.System
        openmm system object
    positions : unit.quantity
        positions of the system
    topology : app.topology.Topology
        openmm compatible topology object
    """
    import simtk.openmm.app as app
    import simtk.unit as unit
    from perses.utils.openeye import OEMol_to_omm_ff
    from simtk import openmm
    from openmmforcefields.generators import SystemGenerator
    from openforcefield.topology import Molecule

    #default arguments for SystemGenerators
    barostat = None
    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'nonbondedMethod': app.NoCutoff,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }
    small_molecule_forcefield = 'gaff-2.11'
    system_generator = SystemGenerator(
        forcefields=forcefield_files,
        barostat=barostat,
        forcefield_kwargs=forcefield_kwargs,
        small_molecule_forcefield=small_molecule_forcefield,
        molecules=[Molecule.from_openeye(molecule)],
        cache=None)

    system, positions, topology = OEMol_to_omm_ff(molecule, system_generator)

    assert (type(system) == type(openmm.System())
            ), "An openmm.System has not been generated from OEMol_to_omm_ff()"

    return system, positions, topology
def get_smirnoff_params(mol: oechem.OEMol) -> {"id": ["atom_indices"]}:
    """For the given molecule, finds the SMIRNOFF params and their atom indices"""
    off_mol = Molecule.from_openeye(mol, allow_undefined_stereo=True)
    try:
        topology = Topology.from_molecules(off_mol)
    except Exception as e:
        return {}
    molecule_force_list = utilize_params_util.SMIRNOFF.label_molecules(topology)

    params = defaultdict(list)
    for force_tag, force_dict in molecule_force_list[0].items():
        for (atom_index, parameter) in force_dict.items():
            params[parameter.id].append(atom_index)

    return params
Ejemplo n.º 16
0
def select_torsions(molecules_list_dict,
                    molecule_attributes,
                    forcefield,
                    target_coverage=3):
    torsions_dict = {}
    smirks_torsions_counter = Counter()
    i_mol = 0
    for mol_index, mol_attr in molecule_attributes.items():
        print(f'{i_mol:<7d}: {mol_index}')
        i_mol += 1
        mapped_smiles = mol_attr[
            'canonical_isomeric_explicit_hydrogen_mapped_smiles']
        # round trip from QCFractal molecule to OpenEye molecule then to Off Molecule
        # this is needed for now to ensure atom indeices are consistent
        qcjson_mol = molecules_list_dict[mol_index][0]
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        off_mol = Off_Molecule.from_openeye(oemol, allow_undefined_stereo=True)
        torsions_coverage = smirnoff_analyze_torsions(forcefield, off_mol)
        for smirks, torsion_idx_list in torsions_coverage.items():
            for atom_indices in torsion_idx_list:
                if smirks_torsions_counter[smirks] < target_coverage:
                    smirks_torsions_counter[smirks] += 1
                    canonical_torsion_index = cmiles.utils.to_canonical_label(
                        mapped_smiles, atom_indices)
                    torsions_dict[canonical_torsion_index] = {
                        'initial_molecules': molecules_list_dict[mol_index],
                        'atom_indices': [atom_indices],
                        'attributes': mol_attr,
                    }
                    print(
                        f"  - torsion {atom_indices} added for smirks {smirks}"
                    )
                else:
                    print(
                        f"  - torsion {atom_indices} skipped because {smirks} have {smirks_torsions_counter[smirks]} already"
                    )
    print("\n## Selected Torsion Coverage ##\n" + '-' * 90)
    ff_torsion_param_list = forcefield.get_parameter_handler(
        'ProperTorsions').parameters
    n_covered = 0
    for param in ff_torsion_param_list:
        count = smirks_torsions_counter[param.smirks]
        print(f"{param.smirks:80s} : {count:7d}")
        if count > 0:
            n_covered += 1
    print('-' * 90)
    print(f'{n_covered} / {len(ff_torsion_param_list)} torsion SMIRKs covered')
    return torsions_dict
Ejemplo n.º 17
0
def min_ffxml(mol, ofs, ffxml):
    """
    Minimize the mol with force field input from FFXML file.

    Parameters
    ----------
    mol : OpenEye single-conformer molecule
    ofs : OpenEye output filestream
    ffxml : string
        name of FFXML file

    """

    # make copy of the input mol
    oe_mol = oechem.OEGraphMol(mol)

    try:
        # create openforcefield molecule ==> prone to triggering Exception
        off_mol = Molecule.from_openeye(oe_mol)

        # load in force field
        ff = ForceField(ffxml)

        # create components for OpenMM system
        topology = Topology.from_molecules(molecules=[off_mol])

        # create openmm system ==> prone to triggering Exception
        #system = ff.create_openmm_system(topology, charge_from_molecules=[off_mol])
        system = ff.create_openmm_system(topology)

    except Exception as e:
        smilabel = oechem.OEGetSDData(oe_mol, "SMILES QCArchive")
        print( ' >>> openforcefield failed to create OpenMM system: '
               f'{oe_mol.GetTitle()} {smilabel}: {e}')
        return

    positions = structure.extractPositionsFromOEMol(oe_mol)

    # minimize structure with ffxml
    newpos, energy = run_openmm(topology, system, positions)

    # save geometry, save energy as tag, write mol to file
    oe_mol.SetCoords(oechem.OEFloatArray(newpos))
    oechem.OESetSDData(oe_mol, "Energy FFXML", str(energy))
    oechem.OEWriteConstMolecule(ofs, oe_mol)

    return
def calculate_mol_params(mol: oechem.OEMol) -> Dict[str, List[List[int]]]:
    """Calculates parameters of the given molecule.

    Returns a dict where the keys are parameter ids and the values are lists
    of indices where the parameter occurs (each entry in the list is itself a
    list because the parameter involves multiple atoms).
    """
    oechem.OEAddExplicitHydrogens(mol)
    off_mol = Molecule.from_openeye(mol, allow_undefined_stereo=True)
    topology = Topology.from_molecules(off_mol)
    molecule_force_list = FORCE_FIELD.label_molecules(topology)

    params = defaultdict(list)
    for _, force_dict in molecule_force_list[0].items():
        for (atom_indices, parameter) in force_dict.items():
            params[parameter.id].append(atom_indices)

    return params
Ejemplo n.º 19
0
    def __init__(self, config_: Config):
        self.config = config_
        self.logger = make_message_writer(self.config.verbose, self.__class__.__name__)
        with self.logger("__init__") as logger:
            self.boxvec = None
            self.explicit = self.config.explicit
            self.system = None
            ofs = oechem.oemolistream(self.config.ligand_file_name)
            oemol = oechem.OEMol()
            oechem.OEReadMolecule(ofs, oemol)
            ofs.close()
            self.inital_ligand_smiles = oechem.OEMolToSmiles(oemol)
            self.params_written = 0
            self.mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True)
            fixer = PDBFixer(self.config.pdb_file_name)
            
            if self.config.use_pdbfixer:
                logger.log("Fixing with PDBFixer")

                fixer.findMissingResidues()
                fixer.findNonstandardResidues()
                fixer.replaceNonstandardResidues()
                fixer.removeHeterogens(keepWater=False)
                fixer.findMissingAtoms()
                fixer.addMissingAtoms()
                fixer.addMissingHydrogens(7.0)



                logger.log("Found missing residues: ", fixer.missingResidues)
                logger.log("Found missing terminals residues: ", fixer.missingTerminals)
                logger.log("Found missing atoms:", fixer.missingAtoms)
                logger.log("Found nonstandard residues:", fixer.nonstandardResidues)


            self.config.pdb_file_name = f"{self.config.tempdir(main_context=True)}/inital_fixed.pdb"
            with open(self.config.pdb_file_name, 'w') as f:
                app.PDBFile.writeFile(fixer.topology, fixer.positions, f)
            cmd.reinitialize()
            cmd.load(self.config.pdb_file_name)
            cmd.load(self.config.ligand_file_name, "UNL")
            cmd.alter("UNL", "resn='UNL'")
            cmd.save("{}".format(self.config.pdb_file_name))
Ejemplo n.º 20
0
def system_generator_wrapper(
        oemols,
        barostat=None,
        forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
        forcefield_kwargs={
            'removeCMMotion': False,
            'ewaldErrorTolerance': 1e-4,
            'nonbondedMethod': app.NoCutoff,
            'constraints': app.HBonds,
            'hydrogenMass': 4 * unit.amus
        },
        small_molecule_forcefield='gaff-2.11',
        **kwargs):
    """
    make a system generator (vacuum) for a small molecule

    Arguments
    ---------
    oemols : list of openeye.oechem.OEMol
        oemols
    barostat : openmm.MonteCarloBarostat, default None
        barostat
    forcefield_files : list of str
        pointers to protein forcefields and solvent
    forcefield_kwargs : dict
        dict of forcefield_kwargs
    small_molecule_forcefield : str
        pointer to small molecule forcefield to use

    Returns
    -------
    system_generator : openmmforcefields.generators.SystemGenerator
    """
    from openforcefield.topology import Molecule
    from openmmforcefields.generators import SystemGenerator
    system_generator = SystemGenerator(
        forcefields=forcefield_files,
        barostat=barostat,
        forcefield_kwargs=forcefield_kwargs,
        small_molecule_forcefield=small_molecule_forcefield,
        molecules=[Molecule.from_openeye(oemol) for oemol in oemols],
        cache=None)
    return system_generator
Ejemplo n.º 21
0
def test_merge_system():
    """Test merging of a system created from AMBER and another created from SMIRNOFF."""
    from .utils import create_system_from_amber, get_amber_file_path, get_alkethoh_file_path

    # Create System from AMBER
    prmtop_filename, inpcrd_filename = get_amber_file_path(
        'cyclohexane_ethanol_0.4_0.6')
    system0, topology0, positions0 = create_system_from_amber(
        prmtop_filename, inpcrd_filename)

    # TODO:
    from openeye import oechem
    # Load simple OEMol
    alkethoh_mol2_filepath = get_alkethoh_file_path('AlkEthOH_c100')[0]
    ifs = oechem.oemolistream(alkethoh_mol2_filepath)
    mol = oechem.OEMol()
    flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield
    ifs.SetFlavor(oechem.OEFormat_MOL2, flavor)
    oechem.OEReadMolecule(ifs, mol)
    oechem.OETriposAtomNames(mol)

    # Load forcefield file.
    AlkEthOH_offxml_filename = utils.get_data_file_path(
        'test_forcefields/Frosst_AlkEthOH.offxml')
    forcefield = ForceField(AlkEthOH_offxml_filename)

    # Create OpenMM System and Topology.
    off_mol = Molecule.from_openeye(mol, allow_undefined_stereo=True)
    off_top = Topology.from_molecules([off_mol])
    system1 = forcefield.create_openmm_system(off_top)
    topology1 = structure.generateTopologyFromOEMol(mol)
    positions1 = structure.extractPositionsFromOEMol(mol)

    structure.merge_system(topology0,
                           topology1,
                           system0,
                           system1,
                           positions0,
                           positions1,
                           verbose=True)
Ejemplo n.º 22
0
def generateSMIRNOFFStructure(oemol):
    """
    Given an OpenEye molecule (oechem.OEMol), create an OpenMM System and use to
    generate a ParmEd structure using the SMIRNOFF forcefield parameters.

    Parameters
    ----------
    oemol : openeye.oechem.OEMol
        OpenEye molecule

    Returns
    -------
    molecule_structure : parmed.Structure
        The resulting Structure

    """
    warnings.warn(DEPRECATION_WARNING_TEXT, PendingDeprecationWarning)
    from openforcefield.topology import Molecule, Topology
    from openforcefield.typing.engines.smirnoff import ForceField

    off_mol = Molecule.from_openeye(oemol)
    off_top = Topology.from_molecules([off_mol])
    mol_ff = ForceField('test_forcefields/smirnoff99Frosst.offxml')

    # Create OpenMM System and Topology.
    omm_top = generateTopologyFromOEMol(oemol)

    # If it's a nonperiodic box, then we can't use default (PME) settings
    if omm_top.getPeriodicBoxVectors() is None:
        mol_ff.get_parameter_handler("Electrostatics", {})._method = 'Coulomb'

    system = mol_ff.create_openmm_system(off_top)

    # Convert to ParmEd structure.
    import parmed
    xyz = extractPositionsFromOEMol(oemol)
    molecule_structure = parmed.openmm.load_topology(omm_top, system, xyz=xyz)

    return molecule_structure
Ejemplo n.º 23
0
    def _openeye_parameteriser(cls, mol, **kwargs):
        """
        Creates a parameterised system from openeye molecule

        Parameters
        ----------
        mol : oechem.OEMol
        """
        try:
            forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')
            molecule = Molecule.from_openeye(
                mol, allow_undefined_stereo=cls.allow_undefined_stereo)
            from openforcefield.utils.toolkits import OpenEyeToolkitWrapper
            molecule.compute_partial_charges_am1bcc(
                toolkit_registry=OpenEyeToolkitWrapper())

            topology = Topology.from_molecules(molecule)
            openmm_system = forcefield.create_openmm_system(
                topology, charge_from_molecules=[molecule])

            ligand_pmd = parmed.openmm.topsystem.load_topology(
                topology.to_openmm(), openmm_system, molecule._conformers[0])
        except Exception as e:
            raise ValueError("Parameterisation Failed : {}".format(e))  #TODO

        ligand_pmd.title = cls.smiles

        for i in ligand_pmd.residues:
            i.name = 'LIG'

        tmp_dir = tempfile.mkdtemp()
        # We need all molecules as both pdb files (as packmol input)
        # and mdtraj.Trajectory for restoring bonds later.
        pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
        from openeye import oechem  # OpenEye Python toolkits
        oechem.OEWriteMolecule(oechem.oemolostream(pdb_filename), mol)
        cls.pdb_filename = pdb_filename
        cls.ligand_pmd = ligand_pmd
Ejemplo n.º 24
0
def generate_testsystem(smiles = 'CCCC',
                        forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
                        forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : None, 'hydrogenMass' : 4 * unit.amus},
                        nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff},
                        periodic_forcefield_kwargs = {'nonbondedMethod': app.PME},
                        small_molecule_forcefield = 'gaff-2.11',
                        padding=9*unit.angstroms,
                        ionicStrength=0.0*unit.molar,
                        water_model = 'tip3p',
                        pressure = 1.0 * unit.atmosphere,
                        temperature = 300 * unit.kelvin,
                        barostat_period = 50,
                        **kwargs
                        ):
    """
    internal small molecule testsystem generator

    arguments
        smiles : str, default 'CCCC'
            smiles string of the small molecule
        forcefield_files = list, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
            forcefield file names
        forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : None, 'hydrogenMass' : 4 * unit.amus}
            forcefield kwargs
        nonperiodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.NoCutoff}
            dict of nonperiodic forcefield kwargs
        small_molecule_forcefield :  str, default 'gaff-2.11'
            small molecule forcefield to parameterize smiles
        padding : simtk.unit.Quantity (compatible with unit.angstroms),default 9.0 * unit.angstroms
            solvent padding
        ionicStrength : simtk.unit.Quantity, default 0.0*unit.molar
            ionic strength of solvent
        water_model : str, default 'tip3p'
            water model for solvation
        pressure : simtk.unit.Quantity, default 1.0 * unit.atmosphere
            pressure of the barostat
        temperature : simtk.unit.Quantity, default 300 * unit.kelvin
            temperature of barostat
        barostat_period : int, default 50
            integer of the barostat period

    returns
        vac_sys_pos_top : tuple
            tuple of the vacuum openmm.System, unit.Quantity(unit.nanometers), openmm.Topology
        sol_sys_pos_top : tuple
            tuple of the solvent openmm.System, unit.Quantity(unit.nanometers), openmm.Topology
    """
    from openforcefield.topology import Molecule
    from perses.utils.openeye import smiles_to_oemol
    from openmmforcefields.generators.system_generators import SystemGenerator
    from perses.utils.openeye import OEMol_to_omm_ff
    from simtk import openmm
    from qmlify.utils import pull_force_by_name

    oemol = smiles_to_oemol(smiles)
    off_molecules = [Molecule.from_openeye(oemol)]
    vac_system_generator = SystemGenerator(forcefields=forcefield_files,
                                       small_molecule_forcefield=small_molecule_forcefield,
                                       forcefield_kwargs=forcefield_kwargs,
                                       nonperiodic_forcefield_kwargs = nonperiodic_forcefield_kwargs, molecules = off_molecules)
    barostat = openmm.MonteCarloBarostat(pressure, temperature, barostat_period)
    sol_system_generator = SystemGenerator(forcefields=forcefield_files,
                                       small_molecule_forcefield=small_molecule_forcefield,
                                       forcefield_kwargs=forcefield_kwargs,
                                       periodic_forcefield_kwargs = periodic_forcefield_kwargs,
                                       molecules = off_molecules,
                                       barostat = barostat)


    vac_system, vac_positions, vac_topology = OEMol_to_omm_ff(oemol, vac_system_generator)

    #now i can attempt to solvate
    modeller = app.Modeller(vac_topology, vac_positions)
    modeller.addSolvent(sol_system_generator.forcefield, model=water_model, padding=padding, ionicStrength=ionicStrength)
    sol_positions, sol_topology = modeller.getPositions(), modeller.getTopology()
    sol_positions = unit.quantity.Quantity(value = np.array([list(atom_pos) for atom_pos in sol_positions.value_in_unit_system(unit.md_unit_system)]), unit = unit.nanometers)
    sol_system = sol_system_generator.create_system(sol_topology)

    vac_sys_pos_top = (vac_system, vac_positions, vac_topology)
    sol_sys_pos_top = (sol_system, sol_positions, sol_topology)

    #a quick assertion to make sure the nonbonded forces are being treated properly
    vac_nbf, sol_nbf = pull_force_by_name(vac_system, 'NonbondedForce'), pull_force_by_name(sol_system, 'NonbondedForce')
    assert not vac_nbf.usesPeriodicBoundaryConditions()
    assert sol_nbf.usesPeriodicBoundaryConditions()

    return vac_sys_pos_top, sol_sys_pos_top
Ejemplo n.º 25
0
def find_smirks_parameters(smiles_list, molecule_paths):
    """Finds the force field parameters which would
    be assigned to a list of molecules defined by the provided
    SMILES patterns.

    Parameters
    ----------
    smiles_list: list of str
        The SMILES patterns of the target molecules
    molecule_paths: list of Path
        The list of molecules that correspond to the SMILES strings (to make it easier to see which molecules
        utilize which parameters)

    Returns
    -------
    dict of str and list of str
        A dictionary with keys of SMIRKS patterns, and
        values of lists of SMILES patterns which would utilize
        those patterns, and the parameter ID in the force field.
    """

    force_field = smirnoff.ForceField('smirnoff99Frosst-1.0.9.offxml')

    smiles_by_smirks = {}
    smiles_by_smirks["Bonds"] = {}
    smiles_by_smirks["Angles"] = {}
    smiles_by_smirks["ProperTorsions"] = {}
    smiles_by_smirks["vdW"] = {}
    smiles_by_smirks["ImproperTorsions"] = {}
    smiles_by_smirks["Electrostatics"] = {}

    # Populate the dictionary using the open force field toolkit.
    for index, smiles in enumerate(smiles_list):

        ifs = oechem.oemolistream()

        if not ifs.open(str(molecule_paths[index])):
            logging.error(
                f'Unable to open {molecule_paths[index]} for reading...')

        ifs.open(str(molecule_paths[index]))
        oe_mols = []
        for mol in ifs.GetOEMols():
            oe_mols.append(oechem.OEMol(mol))
        oechem.OE3DToAtomStereo(oe_mols[0])
        molecule = Molecule.from_openeye(oe_mols[0])

        # molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True)
        topology = Topology.from_molecules([molecule])

        molecule_force_list = force_field.label_molecules(topology)

        for molecule_index, molecule_forces in enumerate(molecule_force_list):
            print(f'Forces for molecule {molecule_index}')
            for force_name, force_dict in molecule_forces.items():
                print(f"\n{force_name}:")
                for (atom_indices, parameter) in force_dict.items():
                    atomstr = ''
                    for idx in atom_indices:
                        atomstr += '%5s' % idx
                    print("atoms: %s  parameter_id: %s  smirks %s" % ([
                        oe_mols[0].GetAtom(oechem.OEHasAtomIdx(i)).GetName()
                        for i in atom_indices
                    ], parameter.id, parameter.smirks))

                    # This is not catching _all_ the atoms that hit a certain parameter.
                    # I think these need to be initialized in the outer loop.
                    # Each parameter is getting a list of length 1.

                    if parameter.id not in smiles_by_smirks[force_name]:
                        smiles_by_smirks[force_name][parameter.id] = {}
                    if "atom_indices" not in smiles_by_smirks[force_name]:
                        smiles_by_smirks[force_name][
                            parameter.id]["atom_indices"] = []
                    if "atom_names" not in smiles_by_smirks[force_name]:
                        smiles_by_smirks[force_name][
                            parameter.id]["atom_names"] = []

                    smiles_by_smirks[force_name][
                        parameter.id]["atom_indices"].append(atom_indices)
                    smiles_by_smirks[force_name][
                        parameter.id]["atom_names"].append([
                            oe_mols[0].GetAtom(
                                oechem.OEHasAtomIdx(i)).GetName()
                            for i in atom_indices
                        ])
                    smiles_by_smirks[force_name][
                        parameter.id]["smirks"] = parameter.smirks

    return smiles_by_smirks
Ejemplo n.º 26
0
def get_parameters(mols_dict, ffxml):
    """
    For a group of structures, call the Open Force Field function
    get_molecule_parameterIDs to identify parameter assignment, grouped
    by molecule and grouped by parameter.

    Parameters
    ----------
    mols_dict : dict of dicts
        the first level key is the SMILES string and the value of that key is
        a dict with the following key/value pairs--
            metric      geometric measurement
            structure   OEGraphMol of the structure
    ffxml : string
        name of FFXML force field file

    Returns
    -------
    parameters_by_molecule : dict
        key is isosmiles generated by Open Force Field internal code;
        value is a list of parameter IDs associated with this molecule
    parameters_by_ID : dict
        key is parameter ID;
        value is a list of isosmiles for all the molecules that have this ID
    smi_dict : dict
        key is isosmiles;
        value is the molecular identifier from the input SDF file
    """

    # load in force field
    ff = ForceField(ffxml)

    # convert OEMols to open force field molecules
    off_mols = []
    smi_dict = {}

    for i, key in enumerate(mols_dict):
        # get mol from the dict
        mymol = mols_dict[key]['structure']

        # create openforcefield molecule from OEMol
        # note: stereo error raised even though coordinates present (todo?)
        off_mol = Molecule.from_openeye(mymol, allow_undefined_stereo=True)
        off_mols.append(off_mol)

        # form a dictionary to backtrace the iso_smiles to original molecule
        smi_dict[off_mol.to_smiles()] = key

    # remove duplicate molecules (else get_molecule_parameterIDs gives err)
    iso_smiles = [molecule.to_smiles() for molecule in off_mols]
    idx_of_duplicates = [
        idx for idx, item in enumerate(iso_smiles) if item in iso_smiles[:idx]
    ]
    for index in sorted(idx_of_duplicates, reverse=True):
        del off_mols[index]

    # create dictionaries describing parameter assignment,
    # grouped both by molecule and by parameter
    parameters_by_molecule, parameters_by_ID = get_molecule_parameterIDs(
        off_mols, ff)

    return parameters_by_molecule, parameters_by_ID, smi_dict
Ejemplo n.º 27
0
def prepare_simulation(molecule, basedir, save_openmm=False):
    """
    Prepare simulation systems

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
       The molecule to set up
    basedir : str
       The base directory for docking/ and fah/ directories
    save_openmm : bool, optional, default=False
       If True, save gzipped OpenMM System, State, Integrator
    """
    # Parameters
    from simtk import unit, openmm
    water_model = 'tip3p'
    solvent_padding = 10.0 * unit.angstrom
    box_size = openmm.vec3.Vec3(3.4,3.4,3.4)*unit.nanometers
    ionic_strength = 100 * unit.millimolar # 100
    pressure = 1.0 * unit.atmospheres
    collision_rate = 1.0 / unit.picoseconds
    temperature = 300.0 * unit.kelvin
    timestep = 4.0 * unit.femtoseconds
    nsteps_per_iteration = 250
    iterations = 10000 # 10 ns (covalent score)

    protein_forcefield = 'amber14/protein.ff14SB.xml'
    small_molecule_forcefield = 'openff-1.1.0'
    #small_molecule_forcefield = 'gaff-2.11' # only if you really like atomtypes
    solvation_forcefield = 'amber14/tip3p.xml'

    # Create SystemGenerators
    import os
    from simtk.openmm import app
    from openforcefield.topology import Molecule
    off_molecule = Molecule.from_openeye(molecule, allow_undefined_stereo=True)
    print(off_molecule)
    barostat = openmm.MonteCarloBarostat(pressure, temperature)

    # docking directory
    docking_basedir = os.path.join(basedir, 'docking')

    # gromacs directory
    gromacs_basedir = os.path.join(basedir, 'gromacs')
    os.makedirs(gromacs_basedir, exist_ok=True)

    # openmm directory
    openmm_basedir = os.path.join(basedir, 'openmm')
    os.makedirs(openmm_basedir, exist_ok=True)

    # Cache directory
    cache = os.path.join(openmm_basedir, f'{molecule.GetTitle()}.json')

    common_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 5e-04,
                     'nonbondedMethod': app.PME, 'hydrogenMass': 3.0*unit.amu}
    unconstrained_kwargs = {'constraints': None, 'rigidWater': False}
    constrained_kwargs = {'constraints': app.HBonds, 'rigidWater': True}
    forcefields = [protein_forcefield, solvation_forcefield]
    from openmmforcefields.generators import SystemGenerator
    parmed_system_generator = SystemGenerator(forcefields=forcefields,
                                              molecules=[off_molecule], small_molecule_forcefield=small_molecule_forcefield, cache=cache,
                                              barostat=barostat,
                                              forcefield_kwargs={**common_kwargs, **unconstrained_kwargs})
    openmm_system_generator = SystemGenerator(forcefields=forcefields,
                                              molecules=[off_molecule], small_molecule_forcefield=small_molecule_forcefield, cache=cache,
                                              barostat=barostat,
                                              forcefield_kwargs={**common_kwargs, **constrained_kwargs})

    # Prepare phases
    import os
    print(f'Setting up simulation for {molecule.GetTitle()}...')
    for phase in ['complex', 'ligand']:
        phase_name = f'{molecule.GetTitle()} - {phase}'
        print(phase_name)

        pdb_filename = os.path.join(docking_basedir, phase_name + '.pdb')
        gro_filename = os.path.join(gromacs_basedir, phase_name + '.gro')
        top_filename = os.path.join(gromacs_basedir, phase_name + '.top')

        system_xml_filename = os.path.join(openmm_basedir, phase_name+'.system.xml.gz')
        integrator_xml_filename = os.path.join(openmm_basedir, phase_name+'.integrator.xml.gz')
        state_xml_filename = os.path.join(openmm_basedir, phase_name+'.state.xml.gz')

        # Check if we can skip setup
        gromacs_files_exist = os.path.exists(gro_filename) and os.path.exists(top_filename)
        openmm_files_exist = os.path.exists(system_xml_filename) and os.path.exists(state_xml_filename) and os.path.exists(integrator_xml_filename)
        if gromacs_files_exist and (not save_openmm or openmm_files_exist):
            continue

        # Filter out UNK atoms by spruce
        with open(pdb_filename, 'r') as infile:
            lines = [ line for line in infile if 'UNK' not in line ]
        from io import StringIO
        pdbfile_stringio = StringIO(''.join(lines))

        # Read the unsolvated system into an OpenMM Topology
        pdbfile = app.PDBFile(pdbfile_stringio)
        topology, positions = pdbfile.topology, pdbfile.positions

        # Add solvent
        print('Adding solvent...')
        modeller = app.Modeller(topology, positions)
        if phase == 'ligand':
            kwargs = {'boxSize' : box_size}
        else:
            kwargs = {'padding' : solvent_padding}
        modeller.addSolvent(openmm_system_generator.forcefield, model='tip3p', ionicStrength=ionic_strength, **kwargs)

        # Create an OpenMM system
        system = openmm_system_generator.create_system(modeller.topology)

        # If monitoring covalent distance, add an unused force
        warheads_found = find_warheads(molecule)
        covalent = (len(warheads_found) > 0)
        if covalent and phase=='complex':

            # Find warhead atom indices
            sulfur_atom_index = None
            for atom in topology.atoms():
                if (atom.residue.name == 'CYS') and (atom.residue.id == '145') and (atom.name == 'SG'):
                    sulfur_atom_index = atom.index
                    break
            if sulfur_atom_index is None:
                raise Exception('CYS145 SG atom cannot be found')

            print('Adding CustomCVForces...')
            custom_cv_force = openmm.CustomCVForce('0')
            for warhead_type, warhead_atom_index in warheads_found.items():
                distance_force = openmm.CustomBondForce('r')
                distance_force.setUsesPeriodicBoundaryConditions(True)
                distance_force.addBond(sulfur_atom_index, warhead_atom_index, [])
                custom_cv_force.addCollectiveVariable(warhead_type, distance_force)
            force_index = system.addForce(custom_cv_force)

        # Create OpenM Context
        platform = openmm.Platform.getPlatformByName('CUDA')
        platform.setPropertyDefaultValue('Precision', 'mixed')
        integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
        context = openmm.Context(system, integrator, platform)
        context.setPositions(modeller.positions)

        # Report initial potential energy
        state = context.getState(getEnergy=True)
        print(f'{molecule.GetTitle()} {phase} : Initial potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol')

        # Minimize
        print('Minimizing...')
        openmm.LocalEnergyMinimizer.minimize(context)

        # Equilibrate
        print('Equilibrating...')
        from tqdm import tqdm
        import numpy as np
        distances = np.zeros([iterations], np.float32)
        for iteration in tqdm(range(iterations)):
            integrator.step(nsteps_per_iteration)
            if covalent and phase=='complex':
                # Get distance in Angstroms
                distances[iteration] = min(custom_cv_force.getCollectiveVariableValues(context)[:]) * 10

        # Retrieve state
        state = context.getState(getPositions=True, getVelocities=True, getEnergy=True, getForces=True)
        system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors())
        modeller.topology.setPeriodicBoxVectors(state.getPeriodicBoxVectors())
        print(f'{molecule.GetTitle()} {phase} : Final potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol')

        # Remove CustomCVForce
        if covalent and phase=='complex':
            print('Removing CustomCVForce...')
            system.removeForce(force_index)
            from pymbar.timeseries import detectEquilibration
            t0, g, Neff = detectEquilibration(distances)
            distances = distances[t0:]
            distance_min = distances.min()
            distance_mean = distances.mean()
            distance_stddev = distances.std()
            oechem.OESetSDData(molecule, 'covalent_distance_min', str(distance_min))
            oechem.OESetSDData(molecule, 'covalent_distance_mean', str(distance_mean))
            oechem.OESetSDData(molecule, 'covalent_distance_stddev', str(distance_stddev))
            print(f'Covalent distance: mean {distance_mean:.3f} A : stddev {distance_stddev:.3f} A')

        # Save as OpenMM
        if save_openmm:
            print('Saving as OpenMM...')
            import gzip
            with gzip.open(integrator_xml_filename, 'wt') as f:
                f.write(openmm.XmlSerializer.serialize(integrator))
            with gzip.open(state_xml_filename,'wt') as f:
                f.write(openmm.XmlSerializer.serialize(state))
            with gzip.open(system_xml_filename,'wt') as f:
                f.write(openmm.XmlSerializer.serialize(system))
            with gzip.open(os.path.join(openmm_basedir, phase_name+'-explicit.pdb.gz'), 'wt') as f:
                app.PDBFile.writeFile(modeller.topology, state.getPositions(), f)
            with gzip.open(os.path.join(openmm_basedir, phase_name+'-solute.pdb.gz'), 'wt') as f:
                import mdtraj
                mdtraj_topology = mdtraj.Topology.from_openmm(modeller.topology)
                mdtraj_trajectory = mdtraj.Trajectory([state.getPositions(asNumpy=True) / unit.nanometers], mdtraj_topology)
                selection = mdtraj_topology.select('not water')
                mdtraj_trajectory = mdtraj_trajectory.atom_slice(selection)
                app.PDBFile.writeFile(mdtraj_trajectory.topology.to_openmm(), mdtraj_trajectory.openmm_positions(0), f)

        # Convert to gromacs via ParmEd
        print('Saving as gromacs...')
        import parmed
        parmed_system = parmed_system_generator.create_system(modeller.topology)
        #parmed_system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors())
        structure = parmed.openmm.load_topology(modeller.topology, parmed_system, xyz=state.getPositions(asNumpy=True))
        structure.save(gro_filename, overwrite=True)
        structure.save(top_filename, overwrite=True)
Ejemplo n.º 28
0
def compare_energies(mol_name="naphthalene",
                     ref_mol_name="benzene",
                     atom_expression=['Hybridization'],
                     bond_expression=['Hybridization']):
    """
    Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same.
    """
    from openmmtools.constants import kB
    from openmmtools import alchemy, states
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from perses.annihilation.relative import HybridTopologyFactory
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    import simtk.openmm as openmm
    from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers
    from perses.utils.openeye import generate_expression
    from openmmforcefields.generators import SystemGenerator
    from openmoltools.forcefield_generators import generateTopologyFromOEMol
    from perses.tests.utils import validate_endstate_energies
    temperature = 300 * unit.kelvin
    # Compute kT and inverse temperature.
    kT = kB * temperature
    beta = 1.0 / kT
    ENERGY_THRESHOLD = 1e-6

    atom_expr, bond_expr = generate_expression(
        atom_expression), generate_expression(bond_expression)

    mol = iupac_to_oemol(mol_name)
    mol = generate_conformers(mol, max_confs=1)

    refmol = iupac_to_oemol(ref_mol_name)
    refmol = generate_conformers(refmol, max_confs=1)

    from openforcefield.topology import Molecule
    molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]]
    barostat = None
    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'nonbondedMethod': app.NoCutoff,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }

    system_generator = SystemGenerator(forcefields=forcefield_files,
                                       barostat=barostat,
                                       forcefield_kwargs=forcefield_kwargs,
                                       small_molecule_forcefield='gaff-2.11',
                                       molecules=molecules,
                                       cache=None)

    topology = generateTopologyFromOEMol(refmol)
    system = system_generator.create_system(topology)
    positions = extractPositionsFromOEMol(refmol)

    proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol],
                                                     system_generator)
    proposal = proposal_engine.propose(system,
                                       topology,
                                       atom_expr=atom_expr,
                                       bond_expr=bond_expr)
    geometry_engine = FFAllAngleGeometryEngine()
    new_positions, _ = geometry_engine.propose(
        proposal, positions, beta=beta, validate_energy_bookkeeping=False)
    _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta)
    #make a topology proposal with the appropriate data:

    factory = HybridTopologyFactory(proposal, positions, new_positions)
    if not proposal.unique_new_atoms:
        assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
        assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
        vacuum_added_valence_energy = 0.0
    else:
        added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

    if not proposal.unique_old_atoms:
        assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
        assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
        subtracted_valence_energy = 0.0
    else:
        subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

    zero_state_error, one_state_error = validate_endstate_energies(
        factory._topology_proposal,
        factory,
        added_valence_energy,
        subtracted_valence_energy,
        beta=1.0 / (kB * temperature),
        ENERGY_THRESHOLD=ENERGY_THRESHOLD,
        platform=openmm.Platform.getPlatformByName('Reference'))
    return factory
Ejemplo n.º 29
0
def data_generator():
    for mol in mols:
        try:
            mol = Molecule.from_openeye(mol)
            topology = Topology.from_molecules(mol)
            mol_sys = FF.create_openmm_system(topology)
            n_atoms = topology.n_topology_atoms
            atoms = tf.convert_to_tensor(
                    [TRANSLATION[atom._atomic_number] for atom in mol.atoms],
                    dtype=tf.float32)

            adjacency_map = np.zeros((n_atoms, n_atoms), dtype=np.float32)

            for bond in mol.bonds:
                assert bond.atom1_index < bond.atom2_index

                adjacency_map[bond.atom1_index, bond.atom2_index] = \
                    bond.bond_order

            adjacency_map = tf.convert_to_tensor(
                adjacency_map,
                dtype=tf.float32)
            
            top = Topology.from_molecules(mol)
            sys = FF.create_openmm_system(top)

            angles = tf.convert_to_tensor(
                    [[x[0], x[1], x[2], 
                        (x[3]._value - 1.965) / 0.237, 
                        (x[4]._value - 507.28) / 396.80] for x in\
                    [sys.getForces(
                        )[0].getAngleParameters(idx)\
                        for idx in range(sys.getForces(
                            )[0].getNumAngles())]],
                    dtype=tf.float32)
            

            bonds = tf.convert_to_tensor([[x[0], x[1], 
                        (x[2]._value - 0.126) / 0.0212, 
                        (x[3]._value - 274856) / 12213.203]  for x in\
                    [sys.getForces(
                        )[1].getBondParameters(idx)\
                        for idx in range(sys.getForces(
                            )[1].getNumBonds())]],
                    dtype=tf.float32)


            torsions = tf.convert_to_tensor([
                [x[0], x[1], x[2], x[3], x[4], x[5]._value, x[6]._value] for x in\
                    [sys.getForces(
                        )[3].getTorsionParameters(idx)\
                        for idx in range(sys.getForces(
                            )[3].getNumTorsions())]],
                    dtype=tf.float32)


            particle_params = tf.convert_to_tensor([[
                    (x[0]._value - 0.00195) / 0.269,
                    (x[1]._value - 0.276) / 0.0654,
                    (x[2]._value - 0.280) / 0.284
                    ] for x in\
                    [sys.getForces(
                        )[2].getParticleParameters(idx)\
                        for idx in range(sys.getForces(
                            )[2].getNumParticles())]])
     
            
            yield atoms, adjacency_map, angles, bonds, torsions, particle_params
        
        except:
            pass
Ejemplo n.º 30
0
def setup_fah_run(destination_path,
                  protein_pdb_filename,
                  oemol=None,
                  cache=None,
                  restrain_rmsd=False):
    """
    Prepare simulation

    Parameters
    ----------
    destination_path : str
        The path to the RUN to be created
    protein_pdb_filename : str
        Path to protein PDB file
    oemol : openeye.oechem.OEMol, optional, default=None
        The molecule to parameterize, with SDData attached
        If None, don't include the small molecule
    restrain_rmsd : bool, optional, default=False
        If True, restrain RMSD during first equilibration phase
    """
    # Parameters
    from simtk import unit, openmm
    protein_forcefield = 'amber14/protein.ff14SB.xml'
    solvent_forcefield = 'amber14/tip3p.xml'
    small_molecule_forcefield = 'openff-1.2.0'
    water_model = 'tip3p'
    solvent_padding = 10.0 * unit.angstrom
    ionic_strength = 70 * unit.millimolar  # assay buffer: 20 mM HEPES pH 7.3, 1 mM TCEP, 50 mM NaCl, 0.01% Tween-20, 10% glycerol
    pressure = 1.0 * unit.atmospheres
    collision_rate = 1.0 / unit.picoseconds
    temperature = 300.0 * unit.kelvin
    timestep = 4.0 * unit.femtoseconds
    iterations = 1000  # 1 ns equilibration
    nsteps_per_iteration = 250

    # Prepare phases
    import os
    system_xml_filename = os.path.join(destination_path, 'system.xml.bz2')
    integrator_xml_filename = os.path.join(destination_path,
                                           'integrator.xml.bz2')
    state_xml_filename = os.path.join(destination_path, 'state.xml.bz2')

    # Check if we can skip setup
    openmm_files_exist = os.path.exists(
        system_xml_filename) and os.path.exists(
            state_xml_filename) and os.path.exists(integrator_xml_filename)
    if openmm_files_exist:
        return

    # Create barostat
    barostat = openmm.MonteCarloBarostat(pressure, temperature)

    # Create RUN directory if it does not yet exist
    os.makedirs(destination_path, exist_ok=True)

    # Load any molecule(s)
    molecule = None
    if oemol is not None:
        from openforcefield.topology import Molecule
        molecule = Molecule.from_openeye(oemol, allow_undefined_stereo=True)
        molecule.name = 'MOL'  # Ensure residue is MOL
        print([res for res in molecule.to_topology().to_openmm().residues()])

    # Create SystemGenerator
    import os
    from simtk.openmm import app
    forcefield_kwargs = {
        'removeCMMotion': False,
        'hydrogenMass': 3.0 * unit.amu,
        'constraints': app.HBonds,
        'rigidWater': True
    }
    periodic_kwargs = {
        'nonbondedMethod': app.PME,
        'ewaldErrorTolerance': 2.5e-04
    }
    forcefields = [protein_forcefield, solvent_forcefield]
    from openmmforcefields.generators import SystemGenerator
    openmm_system_generator = SystemGenerator(
        forcefields=forcefields,
        molecules=molecule,
        small_molecule_forcefield=small_molecule_forcefield,
        cache=cache,
        barostat=barostat,
        forcefield_kwargs=forcefield_kwargs,
        periodic_forcefield_kwargs=periodic_kwargs)

    # Read protein
    print(f'Reading protein from {protein_pdb_filename}...')
    pdbfile = app.PDBFile(protein_pdb_filename)
    modeller = app.Modeller(pdbfile.topology, pdbfile.positions)

    if oemol is not None:
        # Add small molecule to the system
        modeller.add(molecule.to_topology().to_openmm(),
                     molecule.conformers[0])
        # DEBUG : Check residue name
        with open(os.path.join(destination_path, 'initial-complex.pdb'),
                  'wt') as outfile:
            app.PDBFile.writeFile(modeller.topology, modeller.positions,
                                  outfile)

    # Add solvent
    print('Adding solvent...')
    kwargs = {'padding': solvent_padding}
    modeller.addSolvent(openmm_system_generator.forcefield,
                        model='tip3p',
                        ionicStrength=ionic_strength,
                        **kwargs)

    # Create an OpenMM system
    print('Creating OpenMM system...')
    system = openmm_system_generator.create_system(modeller.topology)

    # Add a virtual bond between protein and ligand to make sure they are not imaged separately
    if oemol is not None:
        import mdtraj as md
        mdtop = md.Topology.from_openmm(
            modeller.topology)  # excludes solvent and ions
        for res in mdtop.residues:
            print(res)
        protein_atom_indices = mdtop.select(
            '(protein and name CA)')  # protein CA atoms
        ligand_atom_indices = mdtop.select(
            '((resname MOL) and (mass > 1))')  # ligand heavy atoms
        protein_atom_index = int(protein_atom_indices[0])
        ligand_atom_index = int(ligand_atom_indices[0])
        force = openmm.CustomBondForce('0')
        force.addBond(protein_atom_index, ligand_atom_index, [])
        system.addForce(force)

    # Add RMSD restraints if requested
    if restrain_rmsd:
        print('Adding RMSD restraint...')
        kB = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB
        kT = kB * temperature
        import mdtraj as md
        mdtop = md.Topology.from_openmm(
            pdbfile.topology)  # excludes solvent and ions
        #heavy_atom_indices = mdtop.select('mass > 1') # heavy solute atoms
        rmsd_atom_indices = mdtop.select(
            '(protein and (name CA)) or ((resname MOL) and (mass > 1))'
        )  # CA atoms and ligand heavy atoms
        rmsd_atom_indices = [int(index) for index in rmsd_atom_indices]
        custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*RMSD^2')
        custom_cv_force.addGlobalParameter('K_RMSD', kT / unit.angstrom**2)
        rmsd_force = openmm.RMSDForce(modeller.positions, rmsd_atom_indices)
        custom_cv_force.addCollectiveVariable('RMSD', rmsd_force)
        force_index = system.addForce(custom_cv_force)

    # Create OpenM Context
    platform = openmm.Platform.getPlatformByName('OpenCL')
    platform.setPropertyDefaultValue('Precision', 'mixed')
    from openmmtools import integrators
    integrator = integrators.LangevinIntegrator(temperature, collision_rate,
                                                timestep)
    context = openmm.Context(system, integrator, platform)
    context.setPositions(modeller.positions)

    # Report initial potential energy
    state = context.getState(getEnergy=True)
    print(
        f'Initial potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol'
    )

    # Store snapshots in MDTraj trajectory to examine RMSD
    import mdtraj as md
    import numpy as np
    mdtop = md.Topology.from_openmm(pdbfile.topology)
    atom_indices = mdtop.select('all')  # all solute atoms
    protein_atom_indices = mdtop.select(
        'protein and (mass > 1)')  # heavy solute atoms
    if oemol is not None:
        ligand_atom_indices = mdtop.select(
            '(resname MOL) and (mass > 1)')  # ligand heavy atoms
    trajectory = md.Trajectory(
        np.zeros([iterations + 1, len(atom_indices), 3], np.float32), mdtop)
    trajectory.xyz[0, :, :] = context.getState(getPositions=True).getPositions(
        asNumpy=True)[atom_indices] / unit.nanometers

    # Minimize
    print('Minimizing...')
    openmm.LocalEnergyMinimizer.minimize(context)

    # Equilibrate (with RMSD restraint if needed)
    import numpy as np
    from rich.progress import track
    import time
    initial_time = time.time()
    for iteration in track(range(iterations), 'Equilibrating...'):
        integrator.step(nsteps_per_iteration)
        trajectory.xyz[iteration + 1, :, :] = context.getState(
            getPositions=True).getPositions(
                asNumpy=True)[atom_indices] / unit.nanometers
    elapsed_time = (time.time() - initial_time) * unit.seconds
    ns_per_day = (context.getState().getTime() /
                  elapsed_time) / (unit.nanoseconds / unit.day)
    print(f'Performance: {ns_per_day:8.3f} ns/day')

    if restrain_rmsd:
        # Disable RMSD restraint
        context.setParameter('K_RMSD', 0.0)

        print('Minimizing...')
        openmm.LocalEnergyMinimizer.minimize(context)

        for iteration in track(range(iterations),
                               'Equilibrating without RMSD restraint...'):
            integrator.step(nsteps_per_iteration)

    # Retrieve state
    state = context.getState(getPositions=True,
                             getVelocities=True,
                             getEnergy=True,
                             getForces=True)
    system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors())
    modeller.topology.setPeriodicBoxVectors(state.getPeriodicBoxVectors())
    print(
        f'Final potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol'
    )

    # Equilibrate again if we restrained the RMSD
    if restrain_rmsd:
        print('Removing RMSD restraint from system...')
        system.removeForce(force_index)

    #if oemol is not None:
    #    # Check final RMSD
    #    print('checking RMSD...')
    #    trajectory.superpose(trajectory, atom_indices=protein_atom_indices)
    #    protein_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=protein_atom_indices)[-1] * 10 # Angstroms
    #    oechem.OESetSDData(oemol, 'equil_protein_rmsd', f'{protein_rmsd:.2f} A')
    #    ligand_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=ligand_atom_indices)[-1] * 10 # Angstroms
    #    oechem.OESetSDData(oemol, 'equil_ligand_rmsd', f'{ligand_rmsd:.2f} A')
    #    print('RMSD after equilibration: protein {protein_rmsd:8.2f} A | ligand {ligand_rmsd:8.3f} A')

    # Save as OpenMM
    print('Exporting for OpenMM FAH simulation...')
    import bz2
    with bz2.open(integrator_xml_filename, 'wt') as f:
        f.write(openmm.XmlSerializer.serialize(integrator))
    with bz2.open(state_xml_filename, 'wt') as f:
        f.write(openmm.XmlSerializer.serialize(state))
    with bz2.open(system_xml_filename, 'wt') as f:
        f.write(openmm.XmlSerializer.serialize(system))
    with bz2.open(os.path.join(destination_path, 'equilibrated-all.pdb.gz'),
                  'wt') as f:
        app.PDBFile.writeFile(modeller.topology, state.getPositions(), f)
    with open(os.path.join(destination_path, 'equilibrated-solute.pdb'),
              'wt') as f:
        import mdtraj
        mdtraj_topology = mdtraj.Topology.from_openmm(modeller.topology)
        mdtraj_trajectory = mdtraj.Trajectory(
            [state.getPositions(asNumpy=True) / unit.nanometers],
            mdtraj_topology)
        selection = mdtraj_topology.select('not water')
        mdtraj_trajectory = mdtraj_trajectory.atom_slice(selection)
        app.PDBFile.writeFile(mdtraj_trajectory.topology.to_openmm(),
                              mdtraj_trajectory.openmm_positions(0), f)
    if oemol is not None:
        # Write molecule as SDF, SMILES, and mol2
        for extension in ['sdf', 'mol2', 'smi', 'csv']:
            filename = os.path.join(destination_path, f'molecule.{extension}')
            with oechem.oemolostream(filename) as ofs:
                oechem.OEWriteMolecule(ofs, oemol)

    # Clean up
    del context, integrator