Esempio n. 1
0
def test_mapping_strength_levels(pairs_of_smiles=[('Cc1ccccc1','c1ccc(cc1)N'),('CC(c1ccccc1)','O=C(c1ccccc1)'),('Oc1ccccc1','Sc1ccccc1')],test=True):

    correct_results = {0:{'default': (3,2), 'weak':(3,2), 'strong':(4,3)},
                       1:{'default': (7,3), 'weak':(6,2), 'strong':(7,3)},
                       2:{'default': (1,1), 'weak':(1,1), 'strong':(2,2)}}

    mapping = ['weak','default','strong']

    for example in mapping:
        for index, (lig_a, lig_b) in enumerate(pairs_of_smiles):
            print(f"conducting {example} mapping with ligands {lig_a}, {lig_b}")
            initial_molecule = smiles_to_oemol(lig_a)
            proposed_molecule = smiles_to_oemol(lig_b)
            molecules = [Molecule.from_openeye(mol) for mol in [initial_molecule, proposed_molecule]]
            system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs,nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                                 small_molecule_forcefield = 'gaff-1.81', molecules=molecules, cache=None)
            proposal_engine = SmallMoleculeSetProposalEngine([initial_molecule, proposed_molecule], system_generator)
            initial_system, initial_positions, initial_topology = OEMol_to_omm_ff(initial_molecule, system_generator)
            print(f"running now with map strength {example}")
            proposal = proposal_engine.propose(initial_system, initial_topology, map_strength = example)
            print(lig_a, lig_b,'length OLD and NEW atoms',len(proposal.unique_old_atoms), len(proposal.unique_new_atoms))
            if test:
                render_atom_mapping(f'{index}-{example}.png', initial_molecule, proposed_molecule, proposal._new_to_old_atom_map)
                assert ( (len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) == correct_results[index][example]), f"the mapping failed, correct results are {correct_results[index][example]}"
                print(f"the mapping worked!!!")
            print()
Esempio n. 2
0
def canonicalize_SMILES(smiles_list):
    """Ensure all SMILES strings end up in canonical form.
    Stereochemistry must already have been expanded.
    SMILES strings are converted to a OpenEye Topology and back again.
    Parameters
    ----------
    smiles_list : list of str
        List of SMILES strings
    Returns
    -------
    canonical_smiles_list : list of str
        List of SMILES strings, after canonicalization.
    """

    # Round-trip each molecule to a Topology to end up in canonical form
    from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue, generateTopologyFromOEMol
    from perses.utils.openeye import smiles_to_oemol
    from openeye import oechem
    canonical_smiles_list = list()
    for smiles in smiles_list:
        molecule = smiles_to_oemol(smiles)
        topology = generateTopologyFromOEMol(molecule)
        residues = [ residue for residue in topology.residues() ]
        new_molecule = generateOEMolFromTopologyResidue(residues[0])
        new_smiles = oechem.OECreateIsoSmiString(new_molecule)
        canonical_smiles_list.append(new_smiles)
    return canonical_smiles_list
Esempio n. 3
0
    def _num_dof_compensation(self, smiles):
        """
        Compute an approximate compensating factor for a chemical state based on the number of degrees of freedom that it has.

        The formula is:
        (num_heavy*heavy_factor) + (num_hydrogen*h_factor) where
        heavy_factor = 4.5 and
        light_factor = 3.8

        Parameters
        ----------
        smiles : str
            The SMILES string of the molecule

        Returns
        -------
        correction_factor : float
        """
        mol = smiles_to_oemol(smiles)
        num_heavy = 0
        num_light = 0

        heavy_factor = 4.5
        light_factor = 3.8

        for atom in mol.GetAtoms():
            if atom.GetAtomicNum() == 1:
                num_light += 1
            else:
                num_heavy += 1

        correction_factor = num_heavy*heavy_factor + num_light*light_factor

        return correction_factor
Esempio n. 4
0
def test_small_molecule_proposals():
    """
    Make sure the small molecule proposal engine generates molecules
    """
    list_of_smiles = ['CCCC','CCCCC','CCCCCC']
    list_of_mols = []
    for smi in list_of_smiles:
        mol = smiles_to_oemol(smi)
        list_of_mols.append(mol)
    molecules = [Molecule.from_openeye(mol) for mol in list_of_mols]
    stats_dict = defaultdict(lambda: 0)
    system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                         small_molecule_forcefield = small_molecule_forcefield, molecules=molecules, cache=None)
    proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_mols, system_generator)
    initial_system, initial_positions, initial_topology,  = OEMol_to_omm_ff(list_of_mols[0], system_generator)

    proposal = proposal_engine.propose(initial_system, initial_topology)

    for i in range(50):
        #positions are ignored here, and we don't want to run the geometry engine
        new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology)
        stats_dict[new_proposal.new_chemical_state_key] += 1
        #check that the molecule it generated is actually the smiles we expect
        matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL']
        if len(matching_molecules) != 1:
            raise ValueError("More than one residue with the same name!")
        mol_res = matching_molecules[0]
        oemol = generateOEMolFromTopologyResidue(mol_res)
        smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol))
        assert smiles == proposal.new_chemical_state_key
        proposal = new_proposal
Esempio n. 5
0
def run_oemol_test_suite(iupac='ethane'):
   """
   Runs all of the oemol related tests for perses.utils.openeye

   Parameters
   ---------
   iupac : str, default 'ethane'

   """
   from openmoltools.openeye import iupac_to_oemol
   import copy
   import numpy as np
   import simtk.unit as unit
   from openeye import oechem

   oemol = iupac_to_oemol(iupac)
   positions = test_extractPositionsFromOEMol(oemol)

   # shifting all of the positions by 1. A
   new_positions = np.zeros(np.shape(positions))
   for atom in range(oemol.NumAtoms()):
       new_positions[atom] = copy.deepcopy(positions[atom]) + [1., 1., 1.]*unit.angstrom
   new_positions *= unit.angstrom

   molecule = test_giveOpenmmPositionsToOEMol(new_positions,oemol)

   smiles = oechem.OECreateSmiString(molecule,oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)

   smiles_oemol = smiles_to_oemol(smiles)

   # check that the two systems have the same numbers of atoms
   assert (oemol.NumAtoms() == smiles_oemol.NumAtoms()), "Discrepancy between molecule generated from IUPAC and SMILES"
Esempio n. 6
0
def test_OEMol_to_omm_ff(molecule=smiles_to_oemol('CC')):
    """
    Generating openmm objects for simulation from an OEMol object

    Parameters
    ----------
    molecule : openeye.oechem.OEMol

    Returns
    -------
    system : openmm.System
        openmm system object
    positions : unit.quantity
        positions of the system
    topology : app.topology.Topology
        openmm compatible topology object
    """
    from perses.utils.openeye import OEMol_to_omm_ff
    from simtk import openmm

    system, positions, topology = OEMol_to_omm_ff(molecule)

    assert (type(system) == type(openmm.System())), "An openmm.System has not been generated from OEMol_to_omm_ff()"

    return system, positions, topology
Esempio n. 7
0
def test_extractPositionsFromOEMol(molecule=smiles_to_oemol('CC')):
    """
    Generates an ethane OEMol from string and checks it returns positions of correct length and units

    Paramters
    ----------
    smiles : str, default 'CC'
        default is ethane molecule

    Returns
    -------
    positions : np.array
        openmm positions of molecule with units
    """
    from perses.utils.openeye import extractPositionsFromOEMol
    import simtk.unit as unit

    positions = extractPositionsFromOEMol(molecule)

    assert (len(positions) == molecule.NumAtoms()
            ), "Positions extracted from OEMol does not match number of atoms"
    assert (positions.unit == unit.angstrom
            ), "Positions returned are not in expected units of angstrom"

    return positions
 def __init__(self, i, string):
     from perses.utils.openeye import smiles_to_oemol
     self.line = string
     details = string.split(';')
     self.index = i
     self.smiles, self.name, self.exp, self.experr, self.calc, self.calcerr = details[1:7]
     self.mol = smiles_to_oemol(self.smiles)
     self.exp = kcal_to_kt(float(self.exp))
     self.experr = kcal_to_kt(float(self.experr))
     self.calc = kcal_to_kt(float(self.calc))
     self.calcerr = kcal_to_kt(float(self.calcerr))
     self.mw = self.calculate_molecular_weight()
     self.ha = self.heavy_atom_count()
     self.simtype = None
Esempio n. 9
0
def test_OEMol_to_omm_ff(molecule=smiles_to_oemol('CC')):
    """
    Generating openmm objects for simulation from an OEMol object

    Parameters
    ----------
    molecule : openeye.oechem.OEMol

    Returns
    -------
    system : openmm.System
        openmm system object
    positions : unit.quantity
        positions of the system
    topology : app.topology.Topology
        openmm compatible topology object
    """
    import simtk.openmm.app as app
    import simtk.unit as unit
    from perses.utils.openeye import OEMol_to_omm_ff
    from simtk import openmm
    from openmmforcefields.generators import SystemGenerator
    from openff.toolkit.topology import Molecule

    #default arguments for SystemGenerators
    barostat = None
    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'nonbondedMethod': app.NoCutoff,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }
    small_molecule_forcefield = 'gaff-2.11'
    system_generator = SystemGenerator(
        forcefields=forcefield_files,
        barostat=barostat,
        forcefield_kwargs=forcefield_kwargs,
        small_molecule_forcefield=small_molecule_forcefield,
        molecules=[Molecule.from_openeye(molecule)],
        cache=None)

    system, positions, topology = OEMol_to_omm_ff(molecule, system_generator)

    assert (type(system) == type(openmm.System())
            ), "An openmm.System has not been generated from OEMol_to_omm_ff()"

    return system, positions, topology
Esempio n. 10
0
def test_giveOpenmmPositionsToOEMol(positions=None,
                                    molecule=smiles_to_oemol('CC')):
    """
    Checks that positions of an OEMol can be updated using openmm positions by shifting a molecule by 1 A

    Paramters
    ----------
    positions : openmm positions, default None
        openmm positions that will be used to update the OEMol
    molecule : openeye.oechem.OEMol
        OEMol object to update

    Returns
    -------
    updated_molecule : openeye.oechem.OEMol
        OEMol object with updated positions

    """
    from perses.utils.openeye import giveOpenmmPositionsToOEMol
    import simtk.unit as unit
    import copy

    if positions is None:
        positions = test_extractPositionsFromOEMol(molecule)
        update_positions = copy.deepcopy(positions)
        update_positions[0] += 1. * unit.angstrom
    else:
        update_positions = positions

    updated_molecule = copy.deepcopy(molecule)
    updated_molecule = giveOpenmmPositionsToOEMol(update_positions,
                                                  updated_molecule)

    assert (molecule.GetCoords()[0] != updated_molecule.GetCoords()[0]
            ), "Positions have not been updated successfully"
    new_positions = test_extractPositionsFromOEMol(updated_molecule)
    assert (new_positions.unit == unit.angstrom
            ), "Positions returned are not in expected units of angstrom"

    return updated_molecule
Esempio n. 11
0
def generate_testsystem(smiles = 'CCCC',
                        forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
                        forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : None, 'hydrogenMass' : 4 * unit.amus},
                        nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff},
                        periodic_forcefield_kwargs = {'nonbondedMethod': app.PME},
                        small_molecule_forcefield = 'gaff-2.11',
                        padding=9*unit.angstroms,
                        ionicStrength=0.0*unit.molar,
                        water_model = 'tip3p',
                        pressure = 1.0 * unit.atmosphere,
                        temperature = 300 * unit.kelvin,
                        barostat_period = 50,
                        **kwargs
                        ):
    """
    internal small molecule testsystem generator

    arguments
        smiles : str, default 'CCCC'
            smiles string of the small molecule
        forcefield_files = list, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
            forcefield file names
        forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : None, 'hydrogenMass' : 4 * unit.amus}
            forcefield kwargs
        nonperiodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.NoCutoff}
            dict of nonperiodic forcefield kwargs
        small_molecule_forcefield :  str, default 'gaff-2.11'
            small molecule forcefield to parameterize smiles
        padding : simtk.unit.Quantity (compatible with unit.angstroms),default 9.0 * unit.angstroms
            solvent padding
        ionicStrength : simtk.unit.Quantity, default 0.0*unit.molar
            ionic strength of solvent
        water_model : str, default 'tip3p'
            water model for solvation
        pressure : simtk.unit.Quantity, default 1.0 * unit.atmosphere
            pressure of the barostat
        temperature : simtk.unit.Quantity, default 300 * unit.kelvin
            temperature of barostat
        barostat_period : int, default 50
            integer of the barostat period

    returns
        vac_sys_pos_top : tuple
            tuple of the vacuum openmm.System, unit.Quantity(unit.nanometers), openmm.Topology
        sol_sys_pos_top : tuple
            tuple of the solvent openmm.System, unit.Quantity(unit.nanometers), openmm.Topology
    """
    from openforcefield.topology import Molecule
    from perses.utils.openeye import smiles_to_oemol
    from openmmforcefields.generators.system_generators import SystemGenerator
    from perses.utils.openeye import OEMol_to_omm_ff
    from simtk import openmm
    from qmlify.utils import pull_force_by_name

    oemol = smiles_to_oemol(smiles)
    off_molecules = [Molecule.from_openeye(oemol)]
    vac_system_generator = SystemGenerator(forcefields=forcefield_files,
                                       small_molecule_forcefield=small_molecule_forcefield,
                                       forcefield_kwargs=forcefield_kwargs,
                                       nonperiodic_forcefield_kwargs = nonperiodic_forcefield_kwargs, molecules = off_molecules)
    barostat = openmm.MonteCarloBarostat(pressure, temperature, barostat_period)
    sol_system_generator = SystemGenerator(forcefields=forcefield_files,
                                       small_molecule_forcefield=small_molecule_forcefield,
                                       forcefield_kwargs=forcefield_kwargs,
                                       periodic_forcefield_kwargs = periodic_forcefield_kwargs,
                                       molecules = off_molecules,
                                       barostat = barostat)


    vac_system, vac_positions, vac_topology = OEMol_to_omm_ff(oemol, vac_system_generator)

    #now i can attempt to solvate
    modeller = app.Modeller(vac_topology, vac_positions)
    modeller.addSolvent(sol_system_generator.forcefield, model=water_model, padding=padding, ionicStrength=ionicStrength)
    sol_positions, sol_topology = modeller.getPositions(), modeller.getTopology()
    sol_positions = unit.quantity.Quantity(value = np.array([list(atom_pos) for atom_pos in sol_positions.value_in_unit_system(unit.md_unit_system)]), unit = unit.nanometers)
    sol_system = sol_system_generator.create_system(sol_topology)

    vac_sys_pos_top = (vac_system, vac_positions, vac_topology)
    sol_sys_pos_top = (sol_system, sol_positions, sol_topology)

    #a quick assertion to make sure the nonbonded forces are being treated properly
    vac_nbf, sol_nbf = pull_force_by_name(vac_system, 'NonbondedForce'), pull_force_by_name(sol_system, 'NonbondedForce')
    assert not vac_nbf.usesPeriodicBoundaryConditions()
    assert sol_nbf.usesPeriodicBoundaryConditions()

    return vac_sys_pos_top, sol_sys_pos_top
Esempio n. 12
0
def run():
    # Create initial model system, topology, and positions.
    smiles_list = ["CC", "CCC", "CCCC"]

    initial_molecule = smiles_to_oemol("CC")
    molecules = [Molecule.from_openeye(initial_molecule)]

    system_generator = SystemGenerator(molecules=molecules)

    initial_sys, initial_pos, initial_top = OEMol_to_omm_ff(
        initial_molecule, system_generator)

    smiles = "CC"
    stats = {ms: 0 for ms in smiles_list}
    # Run parameters
    temperature = 300.0 * unit.kelvin  # temperature
    pressure = 1.0 * unit.atmospheres  # pressure
    collision_rate = 5.0 / unit.picoseconds  # collision rate for Langevin dynamics

    # Create proposal metadata, such as the list of molecules to sample (SMILES here)
    # proposal_metadata = {"smiles_list": smiles_list}
    list_of_oemols = []
    for smile in smiles_list:
        oemol = smiles_to_oemol(smile)
        list_of_oemols.append(oemol)

    transformation = topology_proposal.SmallMoleculeSetProposalEngine(
        list_of_oemols=list_of_oemols, system_generator=system_generator)
    # transformation = topology_proposal.SingleSmallMolecule(proposal_metadata)

    # Initialize weight calculation engine, along with its metadata
    bias_calculator = bias_engine.MinimizedPotentialBias(smiles_list)

    # Initialize NCMC engines.
    switching_timestep = (1.0 * unit.femtosecond
                          )  # Timestep for NCMC velocity Verlet integrations
    switching_nsteps = 10  # Number of steps to use in NCMC integration
    switching_functions = {  # Functional schedules to use in terms of `lambda`, which is switched from 0->1 for creation and 1->0 for deletion
        "lambda_sterics": "lambda",
        "lambda_electrostatics": "lambda",
        "lambda_bonds": "lambda",
        "lambda_angles": "sqrt(lambda)",
        "lambda_torsions": "lambda",
    }
    ncmc_engine = ncmc_switching.NCMCEngine(
        temperature=temperature,
        timestep=switching_timestep,
        nsteps=switching_nsteps,
        functions=switching_functions,
    )

    # Initialize GeometryEngine
    geometry_metadata = {"data": 0}  # currently ignored
    geometry_engine = geometry.FFAllAngleGeometryEngine(geometry_metadata)

    # Run a number of iterations.
    niterations = 50
    system = initial_sys
    topology = initial_top
    positions = initial_pos
    current_log_weight = bias_calculator.g_k(smiles)
    n_accepted = 0
    propagate = True
    for i in range(niterations):
        # Store old (system, topology, positions).

        # Propose a transformation from one chemical species to another.
        state_metadata = {"molecule_smiles": smiles}
        top_proposal = transformation.propose(
            system, topology, positions, state_metadata)  # Get a new molecule

        # QUESTION: What about instead initializing StateWeight once, and then using
        # log_state_weight = state_weight.computeLogStateWeight(new_topology, new_system, new_metadata)?
        log_weight = bias_calculator.g_k(
            top_proposal.metadata["molecule_smiles"])

        # Perform alchemical transformation.

        # Alchemically eliminate atoms being removed.

        [ncmc_old_positions,
         ncmc_elimination_logp] = ncmc_engine.integrate(top_proposal,
                                                        positions,
                                                        direction="delete")

        # Generate coordinates for new atoms and compute probability ratio of old and new probabilities.
        # QUESTION: Again, maybe we want to have the geometry engine initialized once only?
        geometry_proposal = geometry_engine.propose(
            top_proposal.new_to_old_atom_map,
            top_proposal.new_system,
            system,
            ncmc_old_positions,
        )

        # Alchemically introduce new atoms.
        [ncmc_new_positions, ncmc_introduction_logp
         ] = ncmc_engine.integrate(top_proposal,
                                   geometry_proposal.new_positions,
                                   direction="insert")

        # Compute total log acceptance probability, including all components.
        logp_accept = (top_proposal.logp_proposal + geometry_proposal.logp +
                       ncmc_elimination_logp + ncmc_introduction_logp +
                       log_weight / log_weight.unit -
                       current_log_weight / current_log_weight.unit)

        # Accept or reject.
        if ((logp_accept >= 0.0) or
            (np.random.uniform() < np.exp(logp_accept))) and not np.any(
                np.isnan(ncmc_new_positions)):
            # Accept.
            n_accepted += 1
            (system, topology, positions, current_log_weight, smiles) = (
                top_proposal.new_system,
                top_proposal.new_topology,
                ncmc_new_positions,
                log_weight,
                top_proposal.metadata["molecule_smiles"],
            )
        else:
            # Reject.
            logging.debug("reject")
        stats[smiles] += 1
        print(positions)
        if propagate:
            p_system = copy.deepcopy(system)
            integrator = openmm.LangevinIntegrator(temperature, collision_rate,
                                                   switching_timestep)
            context = openmm.Context(p_system, integrator)
            context.setPositions(positions)
            print(context.getState(getEnergy=True).getPotentialEnergy())
            integrator.step(1000)
            state = context.getState(getPositions=True)
            positions = state.getPositions(asNumpy=True)
            del context, integrator, p_system

    print("The total number accepted was %d out of %d iterations" %
          (n_accepted, niterations))
    print(stats)
Esempio n. 13
0
def enumerate_conformations(name, smiles=None, pdbname=None):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    """
    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    if pdbname:
        # Make sure to only use one entry if there are mutliple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            print("Splitting '%s' into first entry only: '%s'" %
                  (pdbname, pdbnames[0]))
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (
            pdbname[0], pdbname, pdbname)
        pdb_filename = output_basepath + '-input.pdb'
        retrieve_url(url, pdb_filename)
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (
            pdbname[0], pdbname, pdbname)
        sdf_filename = output_basepath + '-input.sdf'
        retrieve_url(url, sdf_filename)
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(),
                                        pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname
    elif smiles:
        # Generate molecule geometry with OpenEye
        print("Generating molecule {}".format(name))
        oe_molecule = smiles_to_oemol(smiles)
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(oe_molecule)
        oechem.OETriposBondTypeNames(oe_molecule)
        try:
            oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
        except RuntimeError as e:
            traceback.print_exc()
            print("Skipping molecule " + name)
            return
        residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    else:
        raise Exception('Must provide SMILES string or pdbname')

    # Save mol2 file, preserving atom names
    print("Running epik on molecule {}".format(name))
    mol2_file_path = output_basepath + '-input.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path,
                         mae_file_path,
                         tautomerize=False,
                         max_structures=100,
                         min_probability=np.exp(-MAX_ENERGY_PENALTY),
                         ph=7.4)

    # Convert maestro file to sdf and mol2
    output_sdf_filename = output_basepath + '-epik.sdf'
    output_mol2_filename = output_basepath + '-epik.mol2'
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.
    charged_molecules = list()
    index = 0
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        print("Charging molecule %d" % (index))
        try:
            # Charge molecule.
            charged_molecule = openeye.get_charges(mol2_molecule,
                                                   max_confs=800,
                                                   strictStereo=False,
                                                   normalize=True,
                                                   keep_confs=None)
            # Assign Tripos types
            oechem.OETriposAtomTypeNames(charged_molecule)
            oechem.OETriposBondTypeNames(charged_molecule)
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)
        except Exception as e:
            print(e)
            print("Skipping protomer/tautomer because of failed charging.")

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalites.
    outfile = open(output_basepath + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        epik_Ionization_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = output_basepath + '-epik-charged.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)

        #oechem.OEWritePDBFile(ofs, charged_molecule, flavor)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = output_basepath + '-epik-charged.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules,
                                    residue_name)