Ejemplo n.º 1
0
    def test_from_smiles(self):
        """Test creation of a openforcefield Topology object from a SMILES string"""
        topology = Topology.from_molecules(self.ethane_from_smiles)

        assert topology.n_reference_molecules == 1
        assert topology.n_topology_molecules == 1
        assert topology.n_topology_atoms == 8
        assert topology.n_topology_bonds == 7
        assert topology.n_topology_particles == 8
        assert topology.n_topology_virtual_sites == 0
        assert topology.box_vectors is None
        assert len(topology.constrained_atom_pairs.items()) == 0

        topology.add_molecule(self.ethane_from_smiles)

        assert topology.n_reference_molecules == 1
        assert topology.n_topology_molecules == 2
        assert topology.n_topology_atoms == 16
        assert topology.n_topology_bonds == 14
        assert topology.n_topology_particles == 16
        assert topology.n_topology_virtual_sites == 0
        assert topology.box_vectors is None
        assert len(topology.constrained_atom_pairs.items()) == 0
def smirnoff_analyze_torsions(forcefield, off_mol):
    """
    Compute the coverage of all torsions in this molecule

    Parameters
    ----------
    forcefield: openforcefield.typing.engines.smirnoff.ForceField
        The forcefield object for computing coverage
    off_mol: openforcefield.topology.Molecule
        The molecule object for computing torsions coverage

    Returns
    -------
    torsions_coverage: dict
        Key is smirks for the torsion, value is a list of torsion indices
        {SMIRKs: [(0,1,2,3), (2,4,6,7), ..] }
    """
    torsions_coverage = defaultdict(list)
    off_top = Off_Topology.from_molecules(off_mol)
    for torsion_indices, torsion_param in forcefield.label_molecules(
            off_top)[0]['ProperTorsions'].items():
        torsions_coverage[torsion_param.smirks].append(torsion_indices)
    return torsions_coverage
def check_molecule(molecule, test_ff=None):
    """ run a few checks for a QCElemental Molecule """
    import tempfile
    qcjson_mol = molecule.dict(encoding='json')
    oemol = cmiles.utils.load_molecule(qcjson_mol)
    success = True
    err_msg = ""
    cwd = os.getcwd()
    # write a test.mol2 file in a temp dir for checking
    with tempfile.TemporaryDirectory() as tmpdirname:
        os.chdir(tmpdirname)
        ofs.open('test.mol2')
        oechem.OEWriteMolecule(ofs, oemol)
        ofs.close()
        # test if bonds changed
        bond_set = {(a,b) for a,b,v in molecule.connectivity}
        if not check_connectivity(bond_set, 'test.mol2'):
            success = False
            err_msg = "Bonds changed after rebuild"
        # test if can be created by the test_ff
        if success == True and test_ff != None:
            from openforcefield.topology import Molecule as Off_Molecule
            from openforcefield.topology import Topology as Off_Topology
            try:
                off_molecule = Off_Molecule.from_file('test.mol2')
                off_topology = Off_Topology.from_molecules(off_molecule)
                test_ff.create_openmm_system(off_topology)
            except Exception as e:
                success = False
                err_msg = str(e)
        # test if this molecule has hydrogen bonds
        if not check_hbond('test.mol2'):
            success = False
            err_msg = 'One or more hydrogen bond found'
    # go back to orig dir
    os.chdir(cwd)
    return success, err_msg
Ejemplo n.º 4
0
    def _openeye_parameteriser(cls, mol, **kwargs):
        """
        Creates a parameterised system from openeye molecule

        Parameters
        ----------
        mol : oechem.OEMol
        """
        try:
            forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')
            molecule = Molecule.from_openeye(
                mol, allow_undefined_stereo=cls.allow_undefined_stereo)
            from openforcefield.utils.toolkits import OpenEyeToolkitWrapper
            molecule.compute_partial_charges_am1bcc(
                toolkit_registry=OpenEyeToolkitWrapper())

            topology = Topology.from_molecules(molecule)
            openmm_system = forcefield.create_openmm_system(
                topology, charge_from_molecules=[molecule])

            ligand_pmd = parmed.openmm.topsystem.load_topology(
                topology.to_openmm(), openmm_system, molecule._conformers[0])
        except Exception as e:
            raise ValueError("Parameterisation Failed : {}".format(e))  #TODO

        # ligand_pmd.title = cls.smiles

        for i in ligand_pmd.residues:
            i.name = 'LIG'

        tmp_dir = tempfile.mkdtemp()
        # We need all molecules as both pdb files (as packmol input)
        # and mdtraj.Trajectory for restoring bonds later.
        pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
        from openeye import oechem  # OpenEye Python toolkits
        oechem.OEWriteMolecule(oechem.oemolostream(pdb_filename), mol)
        return pdb_filename, ligand_pmd
Ejemplo n.º 5
0
def min_ffxml(mol, ofs, ffxml):

    # make copy of the input mol
    oe_mol = oechem.OEGraphMol(mol)

    try:
        # create openforcefield molecule ==> prone to triggering Exception
        off_mol = Molecule.from_openeye(oe_mol)

        # load in force field
        ff = ForceField(ffxml)

        # create components for OpenMM system
        topology = Topology.from_molecules(molecules=[off_mol])

        # create openmm system ==> prone to triggering Exception
        #system = ff.create_openmm_system(topology, charge_from_molecules=[off_mol])
        system = ff.create_openmm_system(topology)

    except Exception:
        smilabel = oechem.OEGetSDData(oe_mol, "SMILES QCArchive")
        print(' >>> openforcefield failed to create OpenMM system: '
              f'{oe_mol.GetTitle()} {smilabel}')
        return

    positions = structure.extractPositionsFromOEMol(oe_mol)

    # minimize structure with ffxml
    newpos, energy = run_openmm(topology, system, positions)

    # save geometry, save energy as tag, write mol to file
    oe_mol.SetCoords(oechem.OEFloatArray(newpos))
    oechem.OESetSDData(oe_mol, "Energy FFXML", str(energy))
    oechem.OEWriteConstMolecule(ofs, oe_mol)

    return
Ejemplo n.º 6
0
    def get_parameters_from_forcefield(self, forcefield, molecule):
        """
        It returns the parameters that are obtained with the supplied
        forcefield for a certain offpele's molecule.

        Parameters
        ----------
        forcefield : str or an openforcefield.typing.engines.smirnoff.ForceField
                     object
            The forcefield from which the parameters will be obtained
        molecule : an offpele.topology.Molecule
            The offpele's Molecule object

        Returns
        -------
        openforcefield_parameters : an OpenForceFieldParameters object
            The OpenForceFieldParameters object
        """
        from openforcefield.typing.engines.smirnoff import ForceField
        from openforcefield.topology import Topology

        off_molecule = molecule.off_molecule
        topology = Topology.from_molecules([off_molecule])

        if isinstance(forcefield, str):
            forcefield = ForceField(forcefield)
        elif isinstance(forcefield, ForceField):
            pass
        else:
            raise Exception('Invalid forcefield type')

        molecule_parameters_list = forcefield.label_molecules(topology)

        assert len(molecule_parameters_list) == 1, 'A single molecule is ' \
            'expected'
        return self.OpenForceFieldParameters(molecule_parameters_list[0])
Ejemplo n.º 7
0
 def test_from_smiles_unique_mols(self):
     """Test the addition of two different molecules to a topology"""
     topology = Topology.from_molecules(
         [self.ethane_from_smiles, self.propane_from_smiles])
     assert topology.n_topology_molecules == 2
     assert topology.n_reference_molecules == 2
Ejemplo n.º 8
0
def smirnoff_analyze_parameter_coverage(forcefield, targets):
    printcool("SMIRNOFF Parameter Coverage Analysis")
    assert hasattr(forcefield,
                   'offxml'), "Only SMIRNOFF Force Field is supported"
    parameter_assignment_data = defaultdict(list)
    parameter_counter = Counter()
    # The openforcefield.typing.engines.smirnoff.ForceField object should now be contained in forcebalance.forcefield.FF
    ff = forcefield.openff_forcefield
    # analyze each target
    for target in targets:
        off_topology = None
        ## remote targets are not initialized yet, we do a manual setup here
        if isinstance(target, forcebalance.target.RemoteTarget):
            if target.r_tgt_opts['type'].endswith('SMIRNOFF'):
                target_path = os.path.join(target.root, target.tgtdir)
                if target.r_tgt_opts['type'] == 'OPTGEOTARGET_SMIRNOFF':
                    # parse optgeo_options_txt and get the names of the mol2 files
                    optgeo_options_txt = os.path.join(
                        target_path, target.r_tgt_opts['optgeo_options_txt'])
                    sys_opts = forcebalance.opt_geo_target.OptGeoTarget.parse_optgeo_options(
                        optgeo_options_txt)
                    openff_mols = [OffMolecule.from_file(os.path.join(target_path,fnm), allow_undefined_stereo=True) \
                                   for sysopt in sys_opts.values() for fnm in sysopt['mol2']]
                else:
                    openff_mols = [OffMolecule.from_file(os.path.join(target_path,fnm), allow_undefined_stereo=True) \
                                   for fnm in target.r_tgt_opts.get('mol2', [])]
                off_topology = OffTopology.from_molecules(openff_mols)
        elif isinstance(target, forcebalance.opt_geo_target.OptGeoTarget):
            if target.engine_.__name__ == 'SMIRNOFF':
                target_path = os.path.join(target.root, target.tgtdir)
                openff_mols = [OffMolecule.from_file(os.path.join(target_path,fnm), allow_undefined_stereo=True) \
                    for sysopt in target.sys_opts.values() for fnm in sysopt['mol2']]
                off_topology = OffTopology.from_molecules(openff_mols)
        elif hasattr(target, 'engine') and isinstance(
                target.engine, SMIRNOFF) and hasattr(target.engine,
                                                     'off_topology'):
            off_topology = target.engine.off_topology
        if off_topology is not None:
            molecule_force_list = ff.label_molecules(off_topology)
            for mol_idx, mol_forces in enumerate(molecule_force_list):
                for force_tag, force_dict in mol_forces.items():
                    # e.g. force_tag = 'Bonds'
                    for atom_indices, parameter in force_dict.items():
                        param_dict = {
                            'id': parameter.id,
                            'smirks': parameter.smirks,
                            'type': force_tag,
                            'atoms': list(atom_indices),
                        }
                        parameter_assignment_data[target.name].append(
                            param_dict)
                        parameter_counter[parameter.smirks] += 1
        else:
            logger.warning(
                "No smirnoff topology or molecule found for target %s\n" %
                target.name)
    # write out parameter assignment data
    out_json_path = os.path.join(forcefield.root,
                                 'smirnoff_parameter_assignments.json')
    with open(out_json_path, 'w') as jsonfile:
        json.dump(parameter_assignment_data, jsonfile, indent=2)
        logger.info("Force field assignment data written to %s\n" %
                    out_json_path)
    # print parameter coverages
    logger.info("%4s %-100s   %10s\n" % ("idx", "Parameter", "Count"))
    logger.info("-" * 118 + '\n')
    n_covered = 0
    for i, p in enumerate(forcefield.plist):
        smirks = p.split('/')[-1]
        logger.info('%4i %-100s : %10d\n' % (i, p, parameter_counter[smirks]))
        if parameter_counter[smirks] > 0:
            n_covered += 1
    logger.info(
        "SNIRNOFF Parameter Coverage Analysis result: %d/%d parameters are covered.\n"
        % (n_covered, len(forcefield.plist)))
    logger.info("-" * 118 + '\n')
Ejemplo n.º 9
0
def find_smirks_parameters(smiles_list, molecule_paths):
    """Finds the force field parameters which would
    be assigned to a list of molecules defined by the provided
    SMILES patterns.

    Parameters
    ----------
    smiles_list: list of str
        The SMILES patterns of the target molecules
    molecule_paths: list of Path
        The list of molecules that correspond to the SMILES strings (to make it easier to see which molecules
        utilize which parameters)

    Returns
    -------
    dict of str and list of str
        A dictionary with keys of SMIRKS patterns, and
        values of lists of SMILES patterns which would utilize
        those patterns, and the parameter ID in the force field.
    """

    force_field = smirnoff.ForceField('smirnoff99Frosst-1.0.9.offxml')

    smiles_by_smirks = {}
    smiles_by_smirks["Bonds"] = {}
    smiles_by_smirks["Angles"] = {}
    smiles_by_smirks["ProperTorsions"] = {}
    smiles_by_smirks["vdW"] = {}
    smiles_by_smirks["ImproperTorsions"] = {}
    smiles_by_smirks["Electrostatics"] = {}

    # Populate the dictionary using the open force field toolkit.
    for index, smiles in enumerate(smiles_list):

        ifs = oechem.oemolistream()

        if not ifs.open(str(molecule_paths[index])):
            logging.error(
                f'Unable to open {molecule_paths[index]} for reading...')

        ifs.open(str(molecule_paths[index]))
        oe_mols = []
        for mol in ifs.GetOEMols():
            oe_mols.append(oechem.OEMol(mol))
        oechem.OE3DToAtomStereo(oe_mols[0])
        molecule = Molecule.from_openeye(oe_mols[0])

        # molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True)
        topology = Topology.from_molecules([molecule])

        molecule_force_list = force_field.label_molecules(topology)

        for molecule_index, molecule_forces in enumerate(molecule_force_list):
            print(f'Forces for molecule {molecule_index}')
            for force_name, force_dict in molecule_forces.items():
                print(f"\n{force_name}:")
                for (atom_indices, parameter) in force_dict.items():
                    atomstr = ''
                    for idx in atom_indices:
                        atomstr += '%5s' % idx
                    print("atoms: %s  parameter_id: %s  smirks %s" % ([
                        oe_mols[0].GetAtom(oechem.OEHasAtomIdx(i)).GetName()
                        for i in atom_indices
                    ], parameter.id, parameter.smirks))

                    # This is not catching _all_ the atoms that hit a certain parameter.
                    # I think these need to be initialized in the outer loop.
                    # Each parameter is getting a list of length 1.

                    if parameter.id not in smiles_by_smirks[force_name]:
                        smiles_by_smirks[force_name][parameter.id] = {}
                    if "atom_indices" not in smiles_by_smirks[force_name]:
                        smiles_by_smirks[force_name][
                            parameter.id]["atom_indices"] = []
                    if "atom_names" not in smiles_by_smirks[force_name]:
                        smiles_by_smirks[force_name][
                            parameter.id]["atom_names"] = []

                    smiles_by_smirks[force_name][
                        parameter.id]["atom_indices"].append(atom_indices)
                    smiles_by_smirks[force_name][
                        parameter.id]["atom_names"].append([
                            oe_mols[0].GetAtom(
                                oechem.OEHasAtomIdx(i)).GetName()
                            for i in atom_indices
                        ])
                    smiles_by_smirks[force_name][
                        parameter.id]["smirks"] = parameter.smirks

    return smiles_by_smirks
Ejemplo n.º 10
0
def smirnoff_analyze_parameter_coverage(forcefield, tgt_opts):
    printcool("SMIRNOFF Parameter Coverage Analysis")
    assert hasattr(forcefield,
                   'offxml'), "Only SMIRNOFF Force Field is supported"
    parameter_assignment_data = defaultdict(list)
    parameter_counter = Counter()
    # The openforcefield.typing.engines.smirnoff.ForceField object should now be contained in forcebalance.forcefield.FF
    ff = forcefield.openff_forcefield
    # analyze each target
    for tgt_option in tgt_opts:
        target_path = os.path.join('targets', tgt_option['name'])
        # aggregate mol2 file paths from all targets
        mol2_paths = []
        if tgt_option['type'] == 'OPTGEOTARGET_SMIRNOFF':
            # parse optgeo_options_txt and get the names of the mol2 files
            optgeo_options_txt = os.path.join(target_path,
                                              tgt_option['optgeo_options_txt'])
            sys_opts = forcebalance.opt_geo_target.OptGeoTarget.parse_optgeo_options(
                optgeo_options_txt)
            mol2_paths = [
                os.path.join(target_path, fnm) for sysopt in sys_opts.values()
                for fnm in sysopt['mol2']
            ]
        elif tgt_option['type'].endswith('_SMIRNOFF'):
            mol2_paths = [
                os.path.join(target_path, fnm) for fnm in tgt_option['mol2']
            ]
        # analyze SMIRKs terms
        for mol_fnm in mol2_paths:
            # we work with one file at a time to avoid the topology sliently combine "same" molecules
            openff_mol = OffMolecule.from_file(mol_fnm)
            off_topology = OffTopology.from_molecules([openff_mol])
            molecule_force_list = ff.label_molecules(off_topology)
            for mol_idx, mol_forces in enumerate(molecule_force_list):
                for force_tag, force_dict in mol_forces.items():
                    # e.g. force_tag = 'Bonds'
                    for atom_indices, parameter in force_dict.items():
                        param_dict = {
                            'id': parameter.id,
                            'smirks': parameter.smirks,
                            'type': force_tag,
                            'atoms': list(atom_indices),
                        }
                        parameter_assignment_data[mol_fnm].append(param_dict)
                        parameter_counter[parameter.smirks] += 1
    # write out parameter assignment data
    out_json_path = os.path.join(forcefield.root,
                                 'smirnoff_parameter_assignments.json')
    with open(out_json_path, 'w') as jsonfile:
        json.dump(parameter_assignment_data, jsonfile, indent=2)
        logger.info("Force field assignment data written to %s\n" %
                    out_json_path)
    # print parameter coverages
    logger.info("%4s %-100s   %10s\n" % ("idx", "Parameter", "Count"))
    logger.info("-" * 118 + '\n')
    n_covered = 0
    for i, p in enumerate(forcefield.plist):
        smirks = p.split('/')[-1]
        logger.info('%4i %-100s : %10d\n' % (i, p, parameter_counter[smirks]))
        if parameter_counter[smirks] > 0:
            n_covered += 1
    logger.info(
        "SNIRNOFF Parameter Coverage Analysis result: %d/%d parameters are covered.\n"
        % (n_covered, len(forcefield.plist)))
    logger.info("-" * 118 + '\n')
Ejemplo n.º 11
0
def data_generator():
    for mol in mols:
        try:
            mol = Molecule.from_openeye(mol)
            topology = Topology.from_molecules(mol)
            mol_sys = FF.create_openmm_system(topology)
            n_atoms = topology.n_topology_atoms
            atoms = tf.convert_to_tensor(
                    [TRANSLATION[atom._atomic_number] for atom in mol.atoms],
                    dtype=tf.float32)

            adjacency_map = np.zeros((n_atoms, n_atoms), dtype=np.float32)

            for bond in mol.bonds:
                assert bond.atom1_index < bond.atom2_index

                adjacency_map[bond.atom1_index, bond.atom2_index] = \
                    bond.bond_order

            adjacency_map = tf.convert_to_tensor(
                adjacency_map,
                dtype=tf.float32)
            
            top = Topology.from_molecules(mol)
            sys = FF.create_openmm_system(top)

            angles = tf.convert_to_tensor(
                    [[x[0], x[1], x[2], 
                        (x[3]._value - 1.965) / 0.237, 
                        (x[4]._value - 507.28) / 396.80] for x in\
                    [sys.getForces(
                        )[0].getAngleParameters(idx)\
                        for idx in range(sys.getForces(
                            )[0].getNumAngles())]],
                    dtype=tf.float32)
            

            bonds = tf.convert_to_tensor([[x[0], x[1], 
                        (x[2]._value - 0.126) / 0.0212, 
                        (x[3]._value - 274856) / 12213.203]  for x in\
                    [sys.getForces(
                        )[1].getBondParameters(idx)\
                        for idx in range(sys.getForces(
                            )[1].getNumBonds())]],
                    dtype=tf.float32)


            torsions = tf.convert_to_tensor([
                [x[0], x[1], x[2], x[3], x[4], x[5]._value, x[6]._value] for x in\
                    [sys.getForces(
                        )[3].getTorsionParameters(idx)\
                        for idx in range(sys.getForces(
                            )[3].getNumTorsions())]],
                    dtype=tf.float32)


            particle_params = tf.convert_to_tensor([[
                    (x[0]._value - 0.00195) / 0.269,
                    (x[1]._value - 0.276) / 0.0654,
                    (x[2]._value - 0.280) / 0.284
                    ] for x in\
                    [sys.getForces(
                        )[2].getParticleParameters(idx)\
                        for idx in range(sys.getForces(
                            )[2].getNumParticles())]])
     
            
            yield atoms, adjacency_map, angles, bonds, torsions, particle_params
        
        except:
            pass
Ejemplo n.º 12
0
def list_matching_torsions(smi_file, forcefield):
    from fragmenter import chemi  # chemi.file_to_oemols

    # generate oemols from smi file
    oemols = chemi.file_to_oemols(smi_file)
    # list of torsion parameters
    ff_torsion_param_list = forcefield.get_parameter_handler(
        'ProperTorsions').parameters

    # tid_molecules_list[tid] = [{'mol_index': mol_index, 'indices': indices, 'covered_tids':covered_tids}, ...]
    tid_molecules_list = {}
    failed_smi = []
    for torsion_param in ff_torsion_param_list:
        tid_molecules_list[torsion_param.id] = []

    for oemol in tqdm(oemols):
        try:
            off_mol, mol_index, center_bond = gen_canonical_isomeric_smiles(
                oemol)
            oemol = Molecule.to_openeye(off_mol)
        except:
            failed_smi.append(oechem.OEMolToSmiles(oemol))
            continue

        torsions_coverage = defaultdict(list)
        off_top = Topology.from_molecules(off_mol)
        center_tids = defaultdict(set)
        dihedrals = []
        for torsion_indices, torsion_param in forcefield.label_molecules(
                off_top)[0]['ProperTorsions'].items():
            i, j, k, l = torsion_indices
            if set([j, k]) == center_bond:
                center_tids[tuple(sorted([j, k]))].add(torsion_param.id)
                torsions_coverage[torsion_param].append(torsion_indices)
                dihedrals.append(torsion_indices)
        if not check_connectivity(dihedrals, oemol):
            print(f'## {mol_index} has diff bond info in oemol and offmol...')
            continue
        filtered_torsions_coverage = filter_torsions_coverage(
            torsions_coverage, oemol)  # check connectivity

        for idx, (tid, indices_list) in enumerate(
                filtered_torsions_coverage.items()):
            for idxx, indices in enumerate(indices_list):
                if idxx == 0:  # count once
                    covered_tids = []
                    i, j, k, l = indices
                    tids = center_tids[tuple(sorted([j, k]))]
                    for i in tids:
                        if i not in covered_tids:
                            covered_tids.append(i)
                    tid_molecules_list[tid].append({
                        'mol_index': mol_index,
                        'indices': indices,
                        'covered_tids': covered_tids
                    })
    print("\n## Torsion parameter: matched molecules ##\n" + '-' * 90)
    print(
        f"{'idx':<7} {'ID':7s} {'SMIRKS Pattern':70s} {'Number of molecules matched'}"
    )
    for idx, (tid, molecules_list) in enumerate(tid_molecules_list.items()):
        torsion_param = get_torsion_definition(ff_torsion_param_list, tid)
        print(
            f'{idx:<7} {torsion_param.id:7s} {torsion_param.smirks:70s} {len(molecules_list)}'
        )
    print('-' * 90)

    return tid_molecules_list, failed_smi
Ejemplo n.º 13
0
def find_parameter_smirks_matches(parameter_tag="vdW", *smiles_patterns):
    """Finds those force field parameters with a given tag which
    would be assigned to a specified set of molecules defined by
    the their smiles patterns.

    Parameters
    ----------
    parameter_tag: str
        The tag of the force field parameters to find.
    smiles_patterns: str
        The smiles patterns to assign the force field parameters
        to.

    Returns
    -------
    dict of str and list of str
        A dictionary with keys of parameter smirks patterns, and
        values of lists of smiles patterns which would utilize
        those parameters.
    """

    force_field = _get_default_force_field()
    parameter_handler = force_field.get_parameter_handler(parameter_tag)

    # Initialize the array with all possible smirks pattern
    # to make it easier to identify which are missing.
    smiles_by_parameter_smirks = {
        parameter.smirks: set()
        for parameter in parameter_handler.parameters
    }

    # Populate the dictionary using the open force field toolkit.
    for smiles in smiles_patterns:

        if (smiles not in cached_smirks_parameters
                or parameter_tag not in cached_smirks_parameters[smiles]):

            try:
                molecule = Molecule.from_smiles(smiles)
            except UndefinedStereochemistryError:
                # Skip molecules with undefined stereochemistry.
                continue

            topology = Topology.from_molecules([molecule])

            if smiles not in cached_smirks_parameters:
                cached_smirks_parameters[smiles] = {}

            if parameter_tag not in cached_smirks_parameters[smiles]:
                cached_smirks_parameters[smiles][parameter_tag] = []

            cached_smirks_parameters[smiles][parameter_tag] = [
                parameter.smirks for parameter in force_field.label_molecules(
                    topology)[0][parameter_tag].values()
            ]

        parameters_with_tag = cached_smirks_parameters[smiles][parameter_tag]

        for smirks in parameters_with_tag:
            smiles_by_parameter_smirks[smirks].add(smiles)

    return smiles_by_parameter_smirks
Ejemplo n.º 14
0
# Make the SystemGenerator
system_generator = SystemGenerator(
    forcefields=[protein_forcefield, solvation_forcefield],
    barostat=barostat,
    periodic_forcefield_kwargs={"nonbondedMethod": app.PME},
    small_molecule_forcefield=small_molecule_forcefield,
    molecules=ligand_dict[chosen_ligand],
)

# Read in the PDB and create an OpenMM topology
pdbfile = app.PDBFile(input_pdb)
protein_topology, protein_positions = pdbfile.topology, pdbfile.positions

# Add ligand to topology - credit to @hannahbrucemacdonald for help here
print("--> Combining protein and ligand topologies")
off_ligand_topology = Topology.from_molecules(ligand_dict[chosen_ligand])
ligand_topology = off_ligand_topology.to_openmm()
ligand_positions = ligand_dict[chosen_ligand].conformers[0]

md_protein_topology = md.Topology.from_openmm(
    protein_topology
)  # using mdtraj for protein top
md_ligand_topology = md.Topology.from_openmm(
    ligand_topology
)  # using mdtraj for ligand top
md_complex_topology = md_protein_topology.join(md_ligand_topology)  # add them together

complex_topology = md_complex_topology.to_openmm()  # now back to openmm
total_atoms = len(protein_positions) + len(ligand_positions)
complex_positions = unit.Quantity(np.zeros([total_atoms, 3]), unit=unit.nanometers)
complex_positions[0 : len(protein_positions)] = protein_positions
Ejemplo n.º 15
0
    def _find_relevant_gradient_keys(substance, force_field_path,
                                     parameter_gradient_keys):
        """Extract only those keys which may be applied to the
        given substance.

        Parameters
        ----------
        substance: Substance
            The substance to compare against.
        force_field_path: str
            The path to the force field which contains the parameters.
        parameter_gradient_keys: list of ParameterGradientKey
            The original list of parameter gradient keys.

        Returns
        -------
        list of ParameterGradientKey
            The filtered list of parameter gradient keys.
        """
        from openforcefield.topology import Molecule, Topology

        # noinspection PyTypeChecker
        if parameter_gradient_keys == UNDEFINED or len(
                parameter_gradient_keys) == 0:
            return []

        with open(force_field_path) as file:
            force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(force_field_source, SmirnoffForceFieldSource):
            return []

        force_field = force_field_source.to_force_field()

        all_molecules = []

        for component in substance.components:
            all_molecules.append(Molecule.from_smiles(component.smiles))

        topology = Topology.from_molecules(all_molecules)
        labelled_molecules = force_field.label_molecules(topology)

        reduced_parameter_keys = []

        for labelled_molecule in labelled_molecules:

            for parameter_key in parameter_gradient_keys:

                if (parameter_key.tag not in labelled_molecule
                        or parameter_key in reduced_parameter_keys):
                    continue

                contains_parameter = False

                for parameter in labelled_molecule[
                        parameter_key.tag].store.values():

                    if parameter.smirks != parameter_key.smirks:
                        continue

                    contains_parameter = True
                    break

                if not contains_parameter:
                    continue

                reduced_parameter_keys.append(parameter_key)

        return reduced_parameter_keys
Ejemplo n.º 16
0
import os

from openforcefield.topology import Molecule as Off_Molecule
from openforcefield.topology import Topology as Off_Topology
from openforcefield.typing.engines.smirnoff import ForceField

test_ff = ForceField("../../forcefield/param_valence.offxml",
                     allow_cosmetic_attributes=True)

for f in os.listdir('.'):
    if f.endswith('mol2'):
        print(f)
        off_molecule = Off_Molecule.from_file(f)
        off_topology = Off_Topology.from_molecules(off_molecule)
        test_ff.create_openmm_system(off_topology)
Ejemplo n.º 17
0
def topology_batched_md(num=-1, batch_size=16, step_size=100, ani_path='.'):
    gs = []

    import dgl
    from dgl import data

    ofs = oechem.oemolostream()

    idx = 0
    for path in os.listdir(ani_path):
        if idx > num and num != -1:
            break
        if path.endswith('.h5'):
            f = h5py.File(path, 'r')
            for d0 in list(f.keys()):
                if idx > num and num != -1:
                    break
                for d1 in list(f[d0].keys()):

                    if idx > num and num != -1:
                        break

                    print(idx)

                    try:
                        smiles = ''.join([
                            x.decode('utf-8')
                            for x in f[d0][d1]['smiles'].value.tolist()
                        ])
                        coordinates = f[d0][d1]['coordinates'].value
                        energies = f[d0][d1]['energies'].value
                        species = [
                            x.decode('utf-8')
                            for x in f[d0][d1]['species'].value
                        ]

                        low_energy_idx = np.argsort(energies)[0]

                        g, mol = get_ani_mol(coordinates[low_energy_idx],
                                             species, smiles)

                        ofs.open('ds_md/' + str(idx) + '.sdf')
                        oechem.OEWriteMolecule(ofs, mol)

                        # g = hgfp.graph.from_oemol(mol)

                        # g = hgfp.data.mm_energy.u(mol, toolkit='openeye', return_graph=True)

                        mol = Molecule.from_openeye(mol)

                        topology = Topology.from_molecules(mol)

                        mol_sys = FF.create_openmm_system(topology)

                        integrator = LangevinIntegrator(
                            500 * kelvin, 1 / picosecond, 0.002 * picoseconds)

                        simulation = Simulation(topology.to_openmm(), mol_sys,
                                                integrator)

                        simulation.context.setPositions(0.1 *
                                                        g.ndata['xyz'].numpy())

                        simulation.reporters.append(
                            DCDReporter('ds_md/' + str(idx) + '.dcd', 10))

                        simulation.minimizeEnergy()

                        simulation.step(10000)

                        idx += 1

                    except:

                        continue
Ejemplo n.º 18
0
def find_smirks_parameters(parameter_tag='vdW', *smiles_patterns):
    """Finds those force field parameters with a given tag which
    would be assigned to a specified set of molecules defined by
    the their smiles patterns.

    Parameters
    ----------
    parameter_tag: str
        The tag of the force field parameters to find.
    smiles_patterns: str
        The smiles patterns to assign the force field parameters
        to.

    Returns
    -------
    dict of str and list of str
        A dictionary with keys of parameter smirks patterns, and
        values of lists of smiles patterns which would utilize
        those parameters.
    """

    stdout_ = sys.stdout  # Keep track of the previous value.
    stderr_ = sys.stderr  # Keep track of the previous value.

    stream = StringIO()
    sys.stdout = stream
    sys.stderr = stream
    force_field = ForceField('smirnoff99Frosst-1.1.0.offxml')
    sys.stdout = stdout_  # restore the previous stdout.
    sys.stderr = stderr_

    parameter_handler = force_field.get_parameter_handler(parameter_tag)

    smiles_by_parameter_smirks = {}

    # Initialize the array with all possible smirks pattern
    # to make it easier to identify which are missing.
    for parameter in parameter_handler.parameters:

        if parameter.smirks in smiles_by_parameter_smirks:
            continue

        smiles_by_parameter_smirks[parameter.smirks] = set()

    # Populate the dictionary using the open force field toolkit.
    for smiles in smiles_patterns:

        if smiles not in cached_smirks_parameters or parameter_tag not in cached_smirks_parameters[smiles]:

            try:
                molecule = Molecule.from_smiles(smiles)
            except UndefinedStereochemistryError:
                # Skip molecules with undefined stereochemistry.
                continue

            topology = Topology.from_molecules([molecule])

            if smiles not in cached_smirks_parameters:
                cached_smirks_parameters[smiles] = {}

            if parameter_tag not in cached_smirks_parameters[smiles]:
                cached_smirks_parameters[smiles][parameter_tag] = []

            cached_smirks_parameters[smiles][parameter_tag] = [
                parameter.smirks for parameter in force_field.label_molecules(topology)[0][parameter_tag].values()
            ]

        parameters_with_tag = cached_smirks_parameters[smiles][parameter_tag]

        for smirks in parameters_with_tag:
            smiles_by_parameter_smirks[smirks].add(smiles)

    return smiles_by_parameter_smirks
Ejemplo n.º 19
0
def probe_by_parameter(probe_param, ffxml, subdir, all_probe_mols, inpickle):
    """
    For a single force field parameter, (1) find all molecules that use this
    parameter, and (2) save them to a mol2 file labeled with the parameter ID.

    Parameters
    ----------
    probe_param : string
        Name of the parameter to investigate
    ffxml : string
        Name of the FFXML force field file
    subdir : string
        Name of subdirectory in which to save mol2 files for each parameter
    all_probe_mols : dict
        key is string of a parameter id to be probed;
        value is an empty list
    inpickle : string
        Name of the pickle file from output of tailed_parameters.py

    Returns
    -------
    all_probe_mols : dict
        key is string of a parameter id to be probed;
        value is a list of oegraphmols with this parameter id
    """
    prefix_dict = {'a':'Angles', 'b':'Bonds', 'i':'ImproperTorsions', 'n':'vdW', 't':'ProperTorsions'}

    # load parameter dictionaries from pickle
    with open(inpickle, 'rb') as f:
        data_all, data_out = pickle.load(f)
    params_id_out = data_out['params_id']

    # find the first mol in outlier set with given param
    mols_with_probe = list(params_id_out[probe_param])
    probe_mol = Molecule.from_smiles(mols_with_probe[0], allow_undefined_stereo=True)
    topology = Topology.from_molecules([probe_mol])

    # load in force field
    ff = ForceField(ffxml)

    # run molecule labeling
    molecule_force_list = ff.label_molecules(topology)

    # get the smirks pattern associated with param
    prefix = probe_param[0]
    force_dict = molecule_force_list[0][prefix_dict[prefix]]
    for (k, v) in force_dict.items():
        if v.id == probe_param:
            probe_smirks = v.smirks
            break
    print(f"\n=====\n{probe_param}: {probe_smirks}\n=====")

    # find all molecules with this parameter and save to file.
    # conformers are not considered here so these smiles refer to
    # an arbitrary conformer assigned in dict after zip
    # (since duplicate keys are removed in dict)
    outfile = f'{subdir}/param_{probe_param}.mol2'
    ofs = oechem.oemolostream()
    if not ofs.open(outfile):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % outfile)

    for m in mols_with_probe:
        key = data_out['smi_dict'][m]
        print(f"writing out {key}")
        mymol = data_out['mols_dict'][key]['structure']
        oechem.OEWriteConstMolecule(ofs, mymol)

        # save to write full pdf later on
        all_probe_mols[probe_param].append(oechem.OEGraphMol(mymol))

    return all_probe_mols
Ejemplo n.º 20
0
def data_generator():
    for record_name in random.sample(list(ds_qc.data.records), 10):
        try:
            print(record_name, flush=True)
            r = ds_qc.get_record(record_name, specification='default')
            if r is not None:
                traj = r.get_trajectory()
                if traj is not None:
                    for snapshot in traj:
                        energy = tf.convert_to_tensor(
                            snapshot.properties.scf_total_energy *
                            HARTREE_TO_KJ_PER_MOL,
                            dtype=tf.float32)

                        mol = snapshot.get_molecule()
                        # mol = snapshot.get_molecule().dict(encoding='json')

                        atoms = tf.convert_to_tensor([
                            TRANSLATION[atomic_number]
                            for atomic_number in mol.atomic_numbers
                        ],
                                                     dtype=tf.int64)

                        zeros = tf.zeros((tf.shape(
                            atoms, tf.int64)[0], tf.shape(atoms, tf.int64)[0]),
                                         dtype=tf.float32)

                        adjacency_map = tf.tensor_scatter_nd_update(
                            tf.zeros((tf.shape(atoms, tf.int64)[0],
                                      tf.shape(atoms, tf.int64)[0]),
                                     dtype=tf.float32),
                            tf.convert_to_tensor(np.array(
                                mol.connectivity)[:, :2],
                                                 dtype=tf.int64),
                            tf.convert_to_tensor(np.array(mol.connectivity)[:,
                                                                            2],
                                                 dtype=tf.float32))

                        xyz = tf.convert_to_tensor(mol.geometry * BOHR_TO_NM,
                                                   dtype=tf.float32)

                        jacobian = tf.convert_to_tensor(
                            snapshot.return_result\
                            * HARTREE_PER_BOHR_TO_KJ_PER_MOL_PER_NM,
                            dtype=tf.float32)

                        mol = cmiles.utils.load_molecule(
                            mol.dict(encoding='json'))

                        top = Topology.from_molecules(
                            Molecule.from_openeye(mol))
                        sys = FF.create_openmm_system(top)

                        angles = tf.convert_to_tensor(
                                [[x[0], x[1], x[2],
                                    x[3]._value,
                                    x[4]._value] for x in\
                                [sys.getForces(
                                    )[0].getAngleParameters(idx)\
                                    for idx in range(sys.getForces(
                                        )[0].getNumAngles())]],
                                dtype=tf.float32)


                        bonds = tf.convert_to_tensor([[x[0], x[1],
                                    x[2]._value,
                                    x[3]._value]  for x in\
                                [sys.getForces(
                                    )[1].getBondParameters(idx)\
                                    for idx in range(sys.getForces(
                                        )[1].getNumBonds())]],
                                dtype=tf.float32)


                        torsions = tf.convert_to_tensor([
                            [x[0], x[1], x[2], x[3], x[4], x[5]._value, x[6]._value] for x in\
                                [sys.getForces(
                                    )[3].getTorsionParameters(idx)\
                                    for idx in range(sys.getForces(
                                        )[3].getNumTorsions())]],
                                dtype=tf.float32)


                        particle_params = tf.convert_to_tensor([[
                                x[0]._value,
                                x[1]._value,
                                x[2]._value
                                ] for x in\
                                [sys.getForces(
                                    )[2].getParticleParameters(idx)\
                                    for idx in range(sys.getForces(
                                        )[2].getNumParticles())]])

                        yield (atoms, adjacency_map, energy, xyz, jacobian,
                               angles, bonds, torsions, particle_params, sys)

        except:
            pass
Ejemplo n.º 21
0
print("Making output directory ...")
output_directory = pathlib.Path(output_directory)
(output_directory / "equilibration").mkdir(parents=True, exist_ok=True)

print("Writing packages of conda environment ...")
with open(output_directory / "conda_environment.txt", "w") as wf:
    subprocess.run(["conda", "list"], stdout=wf)

print("Reading PDB file ...")
pdb = app.PDBFile(pdb_path)
topology, positions = pdb.topology, pdb.positions

if len(ligand_path) > 0:
    print("Combining topologies ...")  # credit to @hannahbrucemacdonald
    molecule = Molecule.from_file(ligand_path)
    off_ligand_topology = Topology.from_molecules(molecule)
    ligand_topology = off_ligand_topology.to_openmm()
    ligand_positions = molecule.conformers[0]
    md_protein_topology = md.Topology.from_openmm(
        topology
    )  # using mdtraj for protein top
    md_ligand_topology = md.Topology.from_openmm(
        ligand_topology
    )  # using mdtraj for ligand top
    md_complex_topology = md_protein_topology.join(
        md_ligand_topology
    )  # add them together
    complex_topology = md_complex_topology.to_openmm()  # now back to openmm
    total_atoms = len(positions) + len(ligand_positions)
    complex_positions = unit.Quantity(np.zeros([total_atoms, 3]), unit=unit.nanometers)
    complex_positions[0 : len(positions)] = positions