def generate_vacuum_hybrid_topology(mol_name="naphthalene", ref_mol_name="benzene"):
    from topology_proposal import SmallMoleculeSetProposalEngine, TopologyProposal
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename

    m, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(mol_name)
    refmol = createOEMolFromIUPAC(ref_mol_name)

    initial_smiles = oechem.OEMolToSmiles(m)
    final_smiles = oechem.OEMolToSmiles(refmol)

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
    forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator)

    solvated_system = forcefield.createSystem(top_old)

    gaff_filename = get_data_filename('data/gaff.xml')
    system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'])
    geometry_engine = FFAllAngleGeometryEngine()
    proposal_engine = SmallMoleculeSetProposalEngine(
        [initial_smiles, final_smiles], system_generator, residue_name=mol_name)

    #generate topology proposal
    topology_proposal = proposal_engine.propose(solvated_system, top_old)

    #generate new positions with geometry engine
    new_positions, _ = geometry_engine.propose(topology_proposal, pos_old, beta)

    return topology_proposal, pos_old, new_positions
Beispiel #2
0
def generate_vacuum_hybrid_topology(mol_name="naphthalene", ref_mol_name="benzene"):
    from topology_proposal import SmallMoleculeSetProposalEngine, TopologyProposal
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename

    m, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(mol_name)
    refmol = createOEMolFromIUPAC(ref_mol_name)

    initial_smiles = oechem.OEMolToSmiles(m)
    final_smiles = oechem.OEMolToSmiles(refmol)

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
    forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator)

    solvated_system = forcefield.createSystem(top_old)

    gaff_filename = get_data_filename('data/gaff.xml')
    system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'])
    geometry_engine = FFAllAngleGeometryEngine()
    proposal_engine = SmallMoleculeSetProposalEngine(
        [initial_smiles, final_smiles], system_generator, residue_name=mol_name)

    #generate topology proposal
    topology_proposal = proposal_engine.propose(solvated_system, top_old)

    #generate new positions with geometry engine
    new_positions, _ = geometry_engine.propose(topology_proposal, pos_old, beta)

    return topology_proposal, pos_old, new_positions
def run_rj_proposals(top_prop, configuration_traj, use_sterics, ncmc_nsteps, n_replicates, box_vectors, temperature=300.0*unit.kelvin):
    ncmc_engine = NCMCEngine(nsteps=ncmc_nsteps, pressure=1.0*unit.atmosphere)
    geometry_engine = FFAllAngleGeometryEngine(use_sterics=use_sterics)
    initial_thermodynamic_state = states.ThermodynamicState(top_prop.old_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    final_thermodynamic_state = states.ThermodynamicState(top_prop.new_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    traj_indices = np.arange(0, configuration_traj.n_frames)
    results = np.zeros([n_replicates, 7])
    beta = 1.0 / (temperature * constants.kB)

    for i in tqdm.trange(n_replicates):
        frame_index = np.random.choice(traj_indices)

        initial_sampler_state = traj_frame_to_sampler_state(configuration_traj, frame_index,box_vectors)
        initial_logP = - compute_reduced_potential(initial_thermodynamic_state, initial_sampler_state)

        proposed_geometry, logP_geometry_forward = geometry_engine.propose(top_prop, initial_sampler_state.positions, beta)

        proposed_sampler_state = states.SamplerState(proposed_geometry, box_vectors=initial_sampler_state.box_vectors)

        final_old_sampler_state, final_sampler_state, logP_work, initial_hybrid_logP, final_hybrid_logP = ncmc_engine.integrate(top_prop, initial_sampler_state, proposed_sampler_state)

        final_logP = - compute_reduced_potential(final_thermodynamic_state, final_sampler_state)

        logP_reverse = geometry_engine.logp_reverse(top_prop, final_sampler_state.positions, final_old_sampler_state.positions, beta)

        results[i, 0] = initial_logP
        results[i, 1] = logP_reverse
        results[i, 2] = final_logP
        results[i, 3] = logP_work
        results[i, 4] = initial_hybrid_logP
        results[i, 5] = final_hybrid_logP
        results[i, 6] = logP_geometry_forward

    return results
Beispiel #4
0
def run_rj_proposals(top_prop, configuration_traj, use_sterics, ncmc_nsteps, n_replicates, bond_softening_constant=1.0, angle_softening_constant=1.0):
    ncmc_engine = NCMCEngine(nsteps=ncmc_nsteps, pressure=1.0*unit.atmosphere, bond_softening_constant=bond_softening_constant, angle_softening_constant=angle_softening_constant)
    geometry_engine = FFAllAngleGeometryEngine(use_sterics=use_sterics, bond_softening_constant=bond_softening_constant, angle_softening_constant=angle_softening_constant)
    initial_thermodynamic_state = states.ThermodynamicState(top_prop.old_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    final_thermodynamic_state = states.ThermodynamicState(top_prop.new_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    traj_indices = np.arange(0, configuration_traj.n_frames)
    results = np.zeros([n_replicates, 4])

    for i in tqdm.trange(n_replicates):
        frame_index = np.random.choice(traj_indices)
        initial_sampler_state = traj_frame_to_sampler_state(configuration_traj, frame_index)

        initial_logP = - compute_reduced_potential(initial_thermodynamic_state, initial_sampler_state)

        proposed_geometry, logP_geometry_forward = geometry_engine.propose(top_prop, initial_sampler_state.positions, beta)

        proposed_sampler_state = states.SamplerState(proposed_geometry, box_vectors=initial_sampler_state.box_vectors)

        final_old_sampler_state, final_sampler_state, logP_work, initial_hybrid_logP, final_hybrid_logP = ncmc_engine.integrate(top_prop, initial_sampler_state, proposed_sampler_state)

        final_logP = - compute_reduced_potential(final_thermodynamic_state, final_sampler_state)

        logP_reverse = geometry_engine.logp_reverse(top_prop, final_sampler_state.positions, final_old_sampler_state.positions, beta)

        results[i, 0] = initial_hybrid_logP - initial_logP
        results[i, 1] = logP_reverse - logP_geometry_forward
        results[i, 2] = final_logP - final_hybrid_logP
        results[i, 3] = logP_work

    return results
Beispiel #5
0
def generate_top_pos_sys(topology, old_oemol, new_oemol, system, positions,
                         system_generator, map_strength):
    """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation"""
    #create the point mutation engine
    print(f"generating point mutation engine")
    proposal_engine = SmallMoleculeSetProposalEngine(['CCCCO', 'CCCCS'],
                                                     system_generator,
                                                     map_strength=map_strength,
                                                     residue_name='MOL')

    #create a geometry engine
    print(f"generating geometry engine")
    geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                               use_sterics=False,
                                               n_bond_divisions=100,
                                               n_angle_divisions=180,
                                               n_torsion_divisions=360,
                                               verbose=True,
                                               storage=None,
                                               bond_softening_constant=1.0,
                                               angle_softening_constant=1.0,
                                               neglect_angles=False,
                                               use_14_nonbondeds=False)

    #create a top proposal
    print(f"making topology proposal")
    topology_proposal = proposal_engine.propose(system, topology, old_oemol,
                                                new_oemol)

    #make a geometry proposal forward
    print(f"making geometry proposal")
    forward_new_positions, logp_proposal = geometry_engine.propose(
        topology_proposal, positions, beta)

    #create a hybrid topology factory
    f"making forward hybridtopologyfactory"
    forward_htf = HybridTopologyFactory(topology_proposal=topology_proposal,
                                        current_positions=positions,
                                        new_positions=forward_new_positions,
                                        use_dispersion_correction=False,
                                        functions=None,
                                        softcore_alpha=None,
                                        bond_softening_constant=1.0,
                                        angle_softening_constant=1.0,
                                        soften_only_new=False,
                                        neglected_new_angle_terms=[],
                                        neglected_old_angle_terms=[],
                                        softcore_LJ_v2=True,
                                        softcore_electrostatics=True,
                                        softcore_LJ_v2_alpha=0.85,
                                        softcore_electrostatics_alpha=0.3,
                                        softcore_sigma_Q=1.0,
                                        interpolate_old_and_new_14s=False,
                                        omitted_terms=None)

    return topology_proposal, forward_new_positions, forward_htf
    def run_rj_simple_system(self, configurations_initial, topology_proposal, n_replicates):
        """
        Function to execute reversibje jump MC

        Arguments
        ---------
        configurations_initial: openmm.Quantity
            n_replicate frames of equilibrium simulation of initial system
        topology_proposal: dict
            perses.topology_proposal object
        n_replicates: int
            number of replicates to simulate

        Returns
        -------
        logPs: numpy ndarray
            shape = (n_replicates, 4) where logPs[i] = (reduced potential of initial molecule, log proposal probability, reversed log proposal probability, reduced potential of proposed molecule)
        final_positions: list
            list of openmm position objects for final molecule proposal
        """
        import tqdm
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        final_positions = []
        logPs = np.zeros([n_replicates, 4])
        _geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = True)
        for _replicate_idx in tqdm.trange(n_replicates):
            _old_positions = configurations_initial[_replicate_idx, :, :]
            _new_positions, _lp = _geometry_engine.propose(topology_proposal, _old_positions, beta)
            _lp_reverse = _geometry_engine.logp_reverse(topology_proposal, _new_positions, _old_positions, beta)
            _initial_rp = self.compute_rp(topology_proposal.old_system, _old_positions)
            if not topology_proposal.unique_old_atoms: #the geometry engine doesn't run the backward proposal
                logPs[_replicate_idx, 0] = _geometry_engine.forward_atoms_with_positions_reduced_potential
                logPs[_replicate_idx, 3] = _geometry_engine.forward_final_context_reduced_potential
            elif not topology_proposal.unique_new_atoms: #the geometry engine doesn't run forward
                logPs[_replicate_idx, 0] = _geometry_engine.reverse_final_context_reduced_potential
                logPs[_replicate_idx, 3] = _geometry_engine.reverse_atoms_with_positions_reduced_potential
            else:
                logPs[_replicate_idx, 0] = _geometry_engine.reverse_final_context_reduced_potential
                logPs[_replicate_idx, 3] = _geometry_engine.forward_final_context_reduced_potential
            logPs[_replicate_idx, 1] = _lp
            logPs[_replicate_idx, 2] = _lp_reverse
            final_rp = self.compute_rp(topology_proposal.new_system, _new_positions)
            final_positions.append(_new_positions)
        return logPs, final_positions
Beispiel #7
0
    def __init__(self, sampler, topology, state_key, proposal_engine, log_weights=None, scheme='ncmc-geometry-ncmc', options=dict(), platform=None):
        """
        Create an expanded ensemble sampler.

        p(x,k) \propto \exp[-u_k(x) + g_k]

        where g_k is the log weight.

        Parameters
        ----------
        sampler : MCMCSampler
            MCMCSampler initialized with current SamplerState
        topology : simtk.openmm.app.Topology
            Current topology
        state : hashable object
            Current chemical state
        proposal_engine : ProposalEngine
            ProposalEngine to use for proposing new chemical states
        log_weights : dict of object : float
            Log weights to use for expanded ensemble biases.
        scheme : str, optional, default='ncmc-geometry-ncmc'
            Update scheme. One of ['ncmc-geometry-ncmc', 'geometry-ncmc-geometry', 'geometry-ncmc']
        options : dict, optional, default=dict()
            Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC
        platform : simtk.openmm.Platform, optional, default=None
            Platform to use for NCMC switching.  If `None`, default (fastest) platform is used.

        """
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self.topology = topology
        self.state_key = state_key
        self.proposal_engine = proposal_engine
        self.log_weights = log_weights
        self.scheme = scheme
        if self.log_weights is None: self.log_weights = dict()

        # Initialize
        self.iteration = 0
        option_names = ['timestep', 'nsteps', 'functions']
        for option_name in option_names:
            if option_name not in options:
                options[option_name] = None
        from perses.annihilation.ncmc_switching import NCMCEngine
        self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature, timestep=options['timestep'], nsteps=options['nsteps'], functions=options['functions'], platform=platform)
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        self.geometry_engine = FFAllAngleGeometryEngine({'data': 0})
        self.naccepted = 0
        self.nrejected = 0
        self.number_of_state_visits = dict()
        self.verbose = False
        self.pdbfile = None # if not None, write PDB file
        self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals
        self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs
def run_proposals(proposal_list):
    """
    Run a list of geometry proposal namedtuples, checking if they render
    NaN energies

    Parameters
    ----------
    proposal_list : list of namedtuple

    """
    import logging
    logging.basicConfig(level=logging.DEBUG)
    import time
    start_time = time.time()
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    geometry_engine = FFAllAngleGeometryEngine()
    for proposal in proposal_list:
        current_time = time.time()
        #print("proposing")
        top_proposal = proposal.topology_proposal
        current_positions = proposal.current_positions
        new_positions, logp = geometry_engine.propose(top_proposal, current_positions, beta)
        #print("Proposal time is %s" % str(time.time()-current_time))
        integrator = openmm.VerletIntegrator(1*unit.femtoseconds)
        platform = openmm.Platform.getPlatformByName("Reference")
        context = openmm.Context(top_proposal.new_system, integrator, platform)
        context.setPositions(new_positions)
        state = context.getState(getEnergy=True)
        potential = state.getPotentialEnergy()
        potential_without_units = potential / potential.unit
        #print(str(potential))
        #print(" ")
        #print(' ')
        #print(" ")
        if np.isnan(potential_without_units):
            print("NanN potential!")
        if np.isnan(logp):
            print("logp is nan")
        del context, integrator
Beispiel #9
0
def run_proposals(proposal_list):
    """
    Run a list of geometry proposal namedtuples, checking if they render
    NaN energies

    Parameters
    ----------
    proposal_list : list of namedtuple

    """
    import logging
    logging.basicConfig(level=logging.DEBUG)
    import time
    start_time = time.time()
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    geometry_engine = FFAllAngleGeometryEngine()
    for proposal in proposal_list:
        current_time = time.time()
        #print("proposing")
        top_proposal = proposal.topology_proposal
        current_positions = proposal.current_positions
        new_positions, logp = geometry_engine.propose(top_proposal, current_positions, beta)
        #print("Proposal time is %s" % str(time.time()-current_time))
        integrator = openmm.VerletIntegrator(1*unit.femtoseconds)
        platform = openmm.Platform.getPlatformByName("Reference")
        context = openmm.Context(top_proposal.new_system, integrator, platform)
        context.setPositions(new_positions)
        state = context.getState(getEnergy=True)
        potential = state.getPotentialEnergy()
        potential_without_units = potential / potential.unit
        #print(str(potential))
        #print(" ")
        #print(' ')
        #print(" ")
        if np.isnan(potential_without_units):
            print("NanN potential!")
        if np.isnan(logp):
            print("logp is nan")
        del context, integrator
Beispiel #10
0
def compare_energies(mol_name="naphthalene",
                     ref_mol_name="benzene",
                     atom_expression=['Hybridization'],
                     bond_expression=['Hybridization']):
    """
    Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same.
    """
    from openmmtools.constants import kB
    from openmmtools import alchemy, states
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from perses.annihilation.relative import HybridTopologyFactory
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    import simtk.openmm as openmm
    from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers
    from perses.utils.openeye import generate_expression
    from openmmforcefields.generators import SystemGenerator
    from openmoltools.forcefield_generators import generateTopologyFromOEMol
    from perses.tests.utils import validate_endstate_energies
    temperature = 300 * unit.kelvin
    # Compute kT and inverse temperature.
    kT = kB * temperature
    beta = 1.0 / kT
    ENERGY_THRESHOLD = 1e-6

    atom_expr, bond_expr = generate_expression(
        atom_expression), generate_expression(bond_expression)

    mol = iupac_to_oemol(mol_name)
    mol = generate_conformers(mol, max_confs=1)

    refmol = iupac_to_oemol(ref_mol_name)
    refmol = generate_conformers(refmol, max_confs=1)

    from openforcefield.topology import Molecule
    molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]]
    barostat = None
    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'nonbondedMethod': app.NoCutoff,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }

    system_generator = SystemGenerator(forcefields=forcefield_files,
                                       barostat=barostat,
                                       forcefield_kwargs=forcefield_kwargs,
                                       small_molecule_forcefield='gaff-2.11',
                                       molecules=molecules,
                                       cache=None)

    topology = generateTopologyFromOEMol(refmol)
    system = system_generator.create_system(topology)
    positions = extractPositionsFromOEMol(refmol)

    proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol],
                                                     system_generator)
    proposal = proposal_engine.propose(system,
                                       topology,
                                       atom_expr=atom_expr,
                                       bond_expr=bond_expr)
    geometry_engine = FFAllAngleGeometryEngine()
    new_positions, _ = geometry_engine.propose(
        proposal, positions, beta=beta, validate_energy_bookkeeping=False)
    _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta)
    #make a topology proposal with the appropriate data:

    factory = HybridTopologyFactory(proposal, positions, new_positions)
    if not proposal.unique_new_atoms:
        assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
        assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
        vacuum_added_valence_energy = 0.0
    else:
        added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

    if not proposal.unique_old_atoms:
        assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
        assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
        subtracted_valence_energy = 0.0
    else:
        subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

    zero_state_error, one_state_error = validate_endstate_energies(
        factory._topology_proposal,
        factory,
        added_valence_energy,
        subtracted_valence_energy,
        beta=1.0 / (kB * temperature),
        ENERGY_THRESHOLD=ENERGY_THRESHOLD,
        platform=openmm.Platform.getPlatformByName('Reference'))
    return factory
    def __init__(
            self,
            receptor_filename,
            ligand_filename,
            mutation_chain_id,
            mutation_residue_id,
            proposed_residue,
            phase='complex',
            conduct_endstate_validation=False,
            ligand_index=0,
            forcefield_files=[
                'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'
            ],
            barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                               temperature, 50),
            forcefield_kwargs={
                'removeCMMotion': False,
                'ewaldErrorTolerance': 1e-4,
                'nonbondedMethod': app.PME,
                'constraints': app.HBonds,
                'hydrogenMass': 4 * unit.amus
            },
            small_molecule_forcefields='gaff-2.11',
            **kwargs):
        """
        arguments
            receptor_filename : str
                path to receptor; .pdb
            ligand_filename : str
                path to ligand of interest; .sdf or .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the hybrid topology factory
            ligand_index : int, default 0
                which ligand to use
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization

        TODO : allow argument for separate apo structure if it exists separately
               allow argument for specator ligands besides the 'ligand_filename'
        """
        from openforcefield.topology import Molecule
        from openmmforcefields.generators import SystemGenerator

        # first thing to do is make a complex and apo...
        pdbfile = open(receptor_filename, 'r')
        pdb = app.PDBFile(pdbfile)
        pdbfile.close()
        receptor_positions, receptor_topology, receptor_md_topology = pdb.positions, pdb.topology, md.Topology.from_openmm(
            pdb.topology)
        receptor_topology = receptor_md_topology.to_openmm()
        receptor_n_atoms = receptor_md_topology.n_atoms

        molecules = []
        ligand_mol = createOEMolFromSDF(ligand_filename, index=ligand_index)
        ligand_mol = generate_unique_atom_names(ligand_mol)
        molecules.append(
            Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
        ligand_positions, ligand_topology = extractPositionsFromOEMol(
            ligand_mol), forcefield_generators.generateTopologyFromOEMol(
                ligand_mol)
        ligand_md_topology = md.Topology.from_openmm(ligand_topology)
        ligand_n_atoms = ligand_md_topology.n_atoms

        #now create a complex
        complex_md_topology = receptor_md_topology.join(ligand_md_topology)
        complex_topology = complex_md_topology.to_openmm()
        complex_positions = unit.Quantity(np.zeros(
            [receptor_n_atoms + ligand_n_atoms, 3]),
                                          unit=unit.nanometers)
        complex_positions[:receptor_n_atoms, :] = receptor_positions
        complex_positions[receptor_n_atoms:, :] = ligand_positions

        #now for a system_generator
        self.system_generator = SystemGenerator(
            forcefields=forcefield_files,
            barostat=barostat,
            forcefield_kwargs=forcefield_kwargs,
            small_molecule_forcefield=small_molecule_forcefields,
            molecules=molecules,
            cache=None)

        #create complex and apo inputs...
        complex_topology, complex_positions, complex_system = self._solvate(
            complex_topology, complex_positions, 'tip3p', phase=phase)
        apo_topology, apo_positions, apo_system = self._solvate(
            receptor_topology, receptor_positions, 'tip3p', phase='phase')

        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=100,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False,
            use_14_nonbondeds=True)

        #run pipeline...
        htfs = []
        for (top, pos, sys) in zip([complex_topology, apo_topology],
                                   [complex_positions, apo_positions],
                                   [complex_system, apo_system]):
            point_mutation_engine = PointMutationEngine(
                wildtype_topology=top,
                system_generator=self.system_generator,
                chain_id=
                mutation_chain_id,  #denote the chain id allowed to mutate (it's always a string variable)
                max_point_mutants=1,
                residues_allowed_to_mutate=[
                    mutation_residue_id
                ],  #the residue ids allowed to mutate
                allowed_mutations=[
                    (mutation_residue_id, proposed_residue)
                ],  #the residue ids allowed to mutate with the three-letter code allowed to change
                aggregate=True)  #always allow aggregation

            topology_proposal = point_mutation_engine.propose(sys, top)

            new_positions, logp_proposal = geometry_engine.propose(
                topology_proposal, pos, beta)
            logp_reverse = geometry_engine.logp_reverse(
                topology_proposal, new_positions, pos, beta)

            forward_htf = HybridTopologyFactory(
                topology_proposal=topology_proposal,
                current_positions=pos,
                new_positions=new_positions,
                use_dispersion_correction=False,
                functions=None,
                softcore_alpha=None,
                bond_softening_constant=1.0,
                angle_softening_constant=1.0,
                soften_only_new=False,
                neglected_new_angle_terms=[],
                neglected_old_angle_terms=[],
                softcore_LJ_v2=True,
                softcore_electrostatics=True,
                softcore_LJ_v2_alpha=0.85,
                softcore_electrostatics_alpha=0.3,
                softcore_sigma_Q=1.0,
                interpolate_old_and_new_14s=False,
                omitted_terms=None)

            if not topology_proposal.unique_new_atoms:
                assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
                assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
                vacuum_added_valence_energy = 0.0
            else:
                added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

            if not topology_proposal.unique_old_atoms:
                assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
                assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
                subtracted_valence_energy = 0.0
            else:
                subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

            if conduct_endstate_validation:
                zero_state_error, one_state_error = validate_endstate_energies(
                    forward_htf._topology_proposal,
                    forward_htf,
                    added_valence_energy,
                    subtracted_valence_energy,
                    beta=beta,
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD)
            else:
                pass

            htfs.append(forward_htf)

        self.complex_htf = htfs[0]
        self.apo_htf = htfs[1]
Beispiel #12
0
def generate_topology_proposal(old_mol_iupac="pentane",
                               new_mol_iupac="butane"):
    """
    Utility function to generate a topologyproposal for tests

    Parameters
    ----------
    old_mol_iupac : str, optional
        name of old mol, default pentane
    new_mol_iupac : str, optional
        name of new mol, default butane

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal.TopologyProposal
        the topology proposal corresponding to the given transformation
    old_positions : [n, 3] np.ndarray of float
        positions of old mol
    new_positions : [m, 3] np.ndarray of float
        positions of new mol
    """
    from perses.rjmc.topology_proposal import TwoMoleculeSetProposalEngine, SystemGenerator
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    from perses.tests.utils import createSystemFromIUPAC, get_data_filename
    import openmoltools.forcefield_generators as forcefield_generators
    from io import StringIO
    from openmmtools.constants import kB

    temperature = 300.0 * unit.kelvin
    kT = kB * temperature
    beta = 1.0 / kT

    gaff_filename = get_data_filename("data/gaff.xml")
    forcefield_files = [gaff_filename, 'amber99sbildn.xml']

    #generate systems and topologies
    old_mol, old_system, old_positions, old_topology = createSystemFromIUPAC(
        old_mol_iupac)
    new_mol, new_system, new_positions, new_topology = createSystemFromIUPAC(
        new_mol_iupac)

    #set names
    old_mol.SetTitle("MOL")
    new_mol.SetTitle("MOL")

    #generate forcefield and ProposalEngine
    #ffxml=forcefield_generators.generateForceFieldFromMolecules([old_mol, new_mol])
    system_generator = SystemGenerator(
        forcefield_files, forcefield_kwargs={'removeCMMotion': False})
    proposal_engine = TwoMoleculeSetProposalEngine(old_mol,
                                                   new_mol,
                                                   system_generator,
                                                   residue_name="pentane")
    geometry_engine = FFAllAngleGeometryEngine()

    #create a TopologyProposal
    topology_proposal = proposal_engine.propose(old_system, old_topology)
    new_positions_geometry, _ = geometry_engine.propose(
        topology_proposal, old_positions, beta)

    return topology_proposal, old_positions, new_positions_geometry
def HybridTopologyFactory_energies(
        current_mol='toluene',
        proposed_mol='1,2-bis(trifluoromethyl) benzene'):
    """
    Test whether the difference in the nonalchemical zero and alchemical zero states is the forward valence energy.  Also test for the one states.
    """
    from perses.tests.utils import generate_solvated_hybrid_test_topology, generate_endpoint_thermodynamic_states
    import openmmtools.cache as cache

    #Just test the solvated system
    top_proposal, old_positions, _ = generate_solvated_hybrid_test_topology(
        current_mol_name=current_mol, proposed_mol_name=proposed_mol)

    #remove the dispersion correction
    top_proposal._old_system.getForce(3).setUseDispersionCorrection(False)
    top_proposal._new_system.getForce(3).setUseDispersionCorrection(False)

    # run geometry engine to generate old and new positions
    _geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                                use_sterics=False,
                                                n_bond_divisions=100,
                                                n_angle_divisions=180,
                                                n_torsion_divisions=360,
                                                verbose=True,
                                                storage=None,
                                                bond_softening_constant=1.0,
                                                angle_softening_constant=1.0,
                                                neglect_angles=False)
    _new_positions, _lp = _geometry_engine.propose(top_proposal, old_positions,
                                                   beta)
    _lp_rev = _geometry_engine.logp_reverse(top_proposal, _new_positions,
                                            old_positions, beta)

    # make the hybrid system, reset the CustomNonbondedForce cutoff
    HTF = HybridTopologyFactory(top_proposal, old_positions, _new_positions)
    hybrid_system = HTF.hybrid_system
    nonalch_zero, nonalch_one, alch_zero, alch_one = generate_endpoint_thermodynamic_states(
        hybrid_system, top_proposal)

    # compute reduced energies
    #for the nonalchemical systems...
    attrib_list = [(nonalch_zero, old_positions,
                    top_proposal._old_system.getDefaultPeriodicBoxVectors()),
                   (alch_zero, HTF._hybrid_positions,
                    hybrid_system.getDefaultPeriodicBoxVectors()),
                   (alch_one, HTF._hybrid_positions,
                    hybrid_system.getDefaultPeriodicBoxVectors()),
                   (nonalch_one, _new_positions,
                    top_proposal._new_system.getDefaultPeriodicBoxVectors())]

    rp_list = []
    for (state, pos, box_vectors) in attrib_list:
        context, integrator = cache.global_context_cache.get_context(state)
        samplerstate = SamplerState(positions=pos, box_vectors=box_vectors)
        samplerstate.apply_to_context(context)
        rp = state.reduced_potential(context)
        rp_list.append(rp)

    #valence energy definitions
    forward_added_valence_energy = _geometry_engine.forward_final_context_reduced_potential - _geometry_engine.forward_atoms_with_positions_reduced_potential
    reverse_subtracted_valence_energy = _geometry_engine.reverse_final_context_reduced_potential - _geometry_engine.reverse_atoms_with_positions_reduced_potential

    nonalch_zero_rp, alch_zero_rp, alch_one_rp, nonalch_one_rp = rp_list[
        0], rp_list[1], rp_list[2], rp_list[3]
    # print(f"Difference between zeros: {nonalch_zero_rp - alch_zero_rp}; forward added: {forward_added_valence_energy}")
    # print(f"Difference between ones: {nonalch_zero_rp - alch_zero_rp}; forward added: {forward_added_valence_energy}")

    assert abs(
        nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy
    ) < ENERGY_THRESHOLD, f"The zero state alchemical and nonalchemical energy absolute difference {abs(nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy)} is greater than the threshold of {ENERGY_THRESHOLD}."
    assert abs(
        nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy
    ) < ENERGY_THRESHOLD, f"The one state alchemical and nonalchemical energy absolute difference {abs(nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy)} is greater than the threshold of {ENERGY_THRESHOLD}."

    print(
        f"Abs difference in zero alchemical vs nonalchemical systems: {abs(nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy)}"
    )
    print(
        f"Abs difference in one alchemical vs nonalchemical systems: {abs(nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy)}"
    )
Beispiel #14
0
    def __init__(self,
                 protein_pdb_filename,
                 ligand_file,
                 old_ligand_index,
                 new_ligand_index,
                 forcefield_files,
                 pressure=1.0 * unit.atmosphere,
                 temperature=300.0 * unit.kelvin,
                 solvent_padding=9.0 * unit.angstroms):
        """
        Initialize a NonequilibriumFEPSetup object

        Parameters
        ----------
        protein_pdb_filename : str
            The name of the protein pdb file
        ligand_file : str
            the name of the ligand file (any openeye supported format)
        ligand_smiles : list of two str
            The SMILES strings representing the two ligands
        forcefield_files : list of str
            The list of ffxml files that contain the forcefields that will be used
        pressure : Quantity, units of pressure
            Pressure to use in the barostat
        temperature : Quantity, units of temperature
            Temperature to use for the Langevin integrator
        solvent_padding : Quantity, units of length
            The amount of padding to use when adding solvent
        """
        self._protein_pdb_filename = protein_pdb_filename
        self._pressure = pressure
        self._temperature = temperature
        self._barostat_period = 50
        self._padding = solvent_padding

        self._ligand_file = ligand_file
        self._old_ligand_index = old_ligand_index
        self._new_ligand_index = new_ligand_index

        self._old_ligand_oemol = self.load_sdf(self._ligand_file,
                                               index=self._old_ligand_index)
        self._new_ligand_oemol = self.load_sdf(self._ligand_file,
                                               index=self._new_ligand_index)

        self._old_ligand_positions = extractPositionsFromOEMOL(
            self._old_ligand_oemol)

        ffxml = forcefield_generators.generateForceFieldFromMolecules(
            [self._old_ligand_oemol, self._new_ligand_oemol])

        self._old_ligand_oemol.SetTitle("MOL")
        self._new_ligand_oemol.SetTitle("MOL")

        self._new_ligand_smiles = oechem.OECreateSmiString(
            self._new_ligand_oemol,
            oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        #self._old_ligand_smiles = '[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])nc(n3[H])[H])[H])[H])S(=O)(=O)C([H])([H])[H])[H]'
        self._old_ligand_smiles = oechem.OECreateSmiString(
            self._old_ligand_oemol,
            oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)

        print(self._new_ligand_smiles)
        print(self._old_ligand_smiles)

        self._old_ligand_topology = forcefield_generators.generateTopologyFromOEMol(
            self._old_ligand_oemol)
        self._old_ligand_md_topology = md.Topology.from_openmm(
            self._old_ligand_topology)
        self._new_ligand_topology = forcefield_generators.generateTopologyFromOEMol(
            self._new_ligand_oemol)
        self._new_liands_md_topology = md.Topology.from_openmm(
            self._new_ligand_topology)

        protein_pdbfile = open(self._protein_pdb_filename, 'r')
        pdb_file = app.PDBFile(protein_pdbfile)
        protein_pdbfile.close()

        self._protein_topology_old = pdb_file.topology
        self._protein_md_topology_old = md.Topology.from_openmm(
            self._protein_topology_old)
        self._protein_positions_old = pdb_file.positions
        self._forcefield = app.ForceField(*forcefield_files)
        self._forcefield.loadFile(StringIO(ffxml))

        print("Generated forcefield")

        self._complex_md_topology_old = self._protein_md_topology_old.join(
            self._old_ligand_md_topology)
        self._complex_topology_old = self._complex_md_topology_old.to_openmm()

        n_atoms_complex_old = self._complex_topology_old.getNumAtoms()
        n_atoms_protein_old = self._protein_topology_old.getNumAtoms()

        self._complex_positions_old = unit.Quantity(np.zeros(
            [n_atoms_complex_old, 3]),
                                                    unit=unit.nanometers)
        self._complex_positions_old[:
                                    n_atoms_protein_old, :] = self._protein_positions_old
        self._complex_positions_old[
            n_atoms_protein_old:, :] = self._old_ligand_positions

        if pressure is not None:
            barostat = openmm.MonteCarloBarostat(self._pressure,
                                                 self._temperature,
                                                 self._barostat_period)
            self._system_generator = SystemGenerator(
                forcefield_files,
                barostat=barostat,
                forcefield_kwargs={'nonbondedMethod': app.PME})
        else:
            self._system_generator = SystemGenerator(forcefield_files)

        #self._complex_proposal_engine = TwoMoleculeSetProposalEngine(self._old_ligand_smiles, self._new_ligand_smiles, self._system_generator, residue_name="MOL")
        self._complex_proposal_engine = TwoMoleculeSetProposalEngine(
            self._old_ligand_oemol,
            self._new_ligand_oemol,
            self._system_generator,
            residue_name="MOL")
        self._geometry_engine = FFAllAngleGeometryEngine()

        self._complex_topology_old_solvated, self._complex_positions_old_solvated, self._complex_system_old_solvated = self._solvate_system(
            self._complex_topology_old, self._complex_positions_old)
        self._complex_md_topology_old_solvated = md.Topology.from_openmm(
            self._complex_topology_old_solvated)
        print(self._complex_proposal_engine._smiles_list)

        beta = 1.0 / (kB * temperature)

        self._complex_topology_proposal = self._complex_proposal_engine.propose(
            self._complex_system_old_solvated,
            self._complex_topology_old_solvated)
        self._complex_positions_new_solvated, _ = self._geometry_engine.propose(
            self._complex_topology_proposal,
            self._complex_positions_old_solvated, beta)

        #now generate the equivalent objects for the solvent phase. First, generate the ligand-only topologies and atom map
        self._solvent_topology_proposal, self._old_solvent_positions = self._generate_ligand_only_topologies(
            self._complex_positions_old_solvated,
            self._complex_positions_new_solvated)
        self._new_solvent_positions, _ = self._geometry_engine.propose(
            self._solvent_topology_proposal, self._old_solvent_positions, beta)
Beispiel #15
0
class NonequilibriumFEPSetup(object):
    """
    This class is a helper class for nonequilibrium FEP. It generates the input objects that are necessary for the two
    legs of a relative FEP calculation. For each leg, that is a TopologyProposal, old_positions, and new_positions.
    Importantly, it ensures that the atom maps in the solvent and complex phases match correctly.
    """
    def __init__(self,
                 protein_pdb_filename,
                 ligand_file,
                 old_ligand_index,
                 new_ligand_index,
                 forcefield_files,
                 pressure=1.0 * unit.atmosphere,
                 temperature=300.0 * unit.kelvin,
                 solvent_padding=9.0 * unit.angstroms):
        """
        Initialize a NonequilibriumFEPSetup object

        Parameters
        ----------
        protein_pdb_filename : str
            The name of the protein pdb file
        ligand_file : str
            the name of the ligand file (any openeye supported format)
        ligand_smiles : list of two str
            The SMILES strings representing the two ligands
        forcefield_files : list of str
            The list of ffxml files that contain the forcefields that will be used
        pressure : Quantity, units of pressure
            Pressure to use in the barostat
        temperature : Quantity, units of temperature
            Temperature to use for the Langevin integrator
        solvent_padding : Quantity, units of length
            The amount of padding to use when adding solvent
        """
        self._protein_pdb_filename = protein_pdb_filename
        self._pressure = pressure
        self._temperature = temperature
        self._barostat_period = 50
        self._padding = solvent_padding

        self._ligand_file = ligand_file
        self._old_ligand_index = old_ligand_index
        self._new_ligand_index = new_ligand_index

        self._old_ligand_oemol = self.load_sdf(self._ligand_file,
                                               index=self._old_ligand_index)
        self._new_ligand_oemol = self.load_sdf(self._ligand_file,
                                               index=self._new_ligand_index)

        self._old_ligand_positions = extractPositionsFromOEMOL(
            self._old_ligand_oemol)

        ffxml = forcefield_generators.generateForceFieldFromMolecules(
            [self._old_ligand_oemol, self._new_ligand_oemol])

        self._old_ligand_oemol.SetTitle("MOL")
        self._new_ligand_oemol.SetTitle("MOL")

        self._new_ligand_smiles = oechem.OECreateSmiString(
            self._new_ligand_oemol,
            oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        #self._old_ligand_smiles = '[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])nc(n3[H])[H])[H])[H])S(=O)(=O)C([H])([H])[H])[H]'
        self._old_ligand_smiles = oechem.OECreateSmiString(
            self._old_ligand_oemol,
            oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)

        print(self._new_ligand_smiles)
        print(self._old_ligand_smiles)

        self._old_ligand_topology = forcefield_generators.generateTopologyFromOEMol(
            self._old_ligand_oemol)
        self._old_ligand_md_topology = md.Topology.from_openmm(
            self._old_ligand_topology)
        self._new_ligand_topology = forcefield_generators.generateTopologyFromOEMol(
            self._new_ligand_oemol)
        self._new_liands_md_topology = md.Topology.from_openmm(
            self._new_ligand_topology)

        protein_pdbfile = open(self._protein_pdb_filename, 'r')
        pdb_file = app.PDBFile(protein_pdbfile)
        protein_pdbfile.close()

        self._protein_topology_old = pdb_file.topology
        self._protein_md_topology_old = md.Topology.from_openmm(
            self._protein_topology_old)
        self._protein_positions_old = pdb_file.positions
        self._forcefield = app.ForceField(*forcefield_files)
        self._forcefield.loadFile(StringIO(ffxml))

        print("Generated forcefield")

        self._complex_md_topology_old = self._protein_md_topology_old.join(
            self._old_ligand_md_topology)
        self._complex_topology_old = self._complex_md_topology_old.to_openmm()

        n_atoms_complex_old = self._complex_topology_old.getNumAtoms()
        n_atoms_protein_old = self._protein_topology_old.getNumAtoms()

        self._complex_positions_old = unit.Quantity(np.zeros(
            [n_atoms_complex_old, 3]),
                                                    unit=unit.nanometers)
        self._complex_positions_old[:
                                    n_atoms_protein_old, :] = self._protein_positions_old
        self._complex_positions_old[
            n_atoms_protein_old:, :] = self._old_ligand_positions

        if pressure is not None:
            barostat = openmm.MonteCarloBarostat(self._pressure,
                                                 self._temperature,
                                                 self._barostat_period)
            self._system_generator = SystemGenerator(
                forcefield_files,
                barostat=barostat,
                forcefield_kwargs={'nonbondedMethod': app.PME})
        else:
            self._system_generator = SystemGenerator(forcefield_files)

        #self._complex_proposal_engine = TwoMoleculeSetProposalEngine(self._old_ligand_smiles, self._new_ligand_smiles, self._system_generator, residue_name="MOL")
        self._complex_proposal_engine = TwoMoleculeSetProposalEngine(
            self._old_ligand_oemol,
            self._new_ligand_oemol,
            self._system_generator,
            residue_name="MOL")
        self._geometry_engine = FFAllAngleGeometryEngine()

        self._complex_topology_old_solvated, self._complex_positions_old_solvated, self._complex_system_old_solvated = self._solvate_system(
            self._complex_topology_old, self._complex_positions_old)
        self._complex_md_topology_old_solvated = md.Topology.from_openmm(
            self._complex_topology_old_solvated)
        print(self._complex_proposal_engine._smiles_list)

        beta = 1.0 / (kB * temperature)

        self._complex_topology_proposal = self._complex_proposal_engine.propose(
            self._complex_system_old_solvated,
            self._complex_topology_old_solvated)
        self._complex_positions_new_solvated, _ = self._geometry_engine.propose(
            self._complex_topology_proposal,
            self._complex_positions_old_solvated, beta)

        #now generate the equivalent objects for the solvent phase. First, generate the ligand-only topologies and atom map
        self._solvent_topology_proposal, self._old_solvent_positions = self._generate_ligand_only_topologies(
            self._complex_positions_old_solvated,
            self._complex_positions_new_solvated)
        self._new_solvent_positions, _ = self._geometry_engine.propose(
            self._solvent_topology_proposal, self._old_solvent_positions, beta)

    def load_sdf(self, sdf_filename, index=0):
        """
        Load an SDF file into an OEMol. Since SDF files can contain multiple molecules, an index can be provided as well.

        Parameters
        ----------
        sdf_filename : str
            The name of the SDF file
        index : int, default 0
            The index of the molecule in the SDF file

        Returns
        -------
        mol : openeye.oechem.OEMol object
            The loaded oemol object
        """
        ifs = oechem.oemolistream()
        ifs.open(sdf_filename)
        #get the list of molecules
        mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()]
        #we'll always take the first for now
        mol_to_return = mol_list[index]
        return mol_to_return

    def _solvate_system(self, topology, positions, model='tip3p'):
        """
        Generate a solvated topology, positions, and system for a given input topology and positions.
        For generating the system, the forcefield files provided in the constructor will be used.

        Parameters
        ----------
        topology : app.Topology
            Topology of the system to solvate
        positions : [n, 3] ndarray of Quantity nm
            the positions of the unsolvated system

        Returns
        -------
        solvated_topology : app.Topology
            Topology of the system with added waters
        solvated_positions : [n + 3(n_waters), 3] ndarray of Quantity nm
            Solvated positions
        solvated_system : openmm.System
            The parameterized system, containing a barostat if one was specified.
        """
        modeller = app.Modeller(topology, positions)
        hs = [
            atom for atom in modeller.topology.atoms()
            if atom.element.symbol in ['H'] and atom.residue.name != "MOL"
        ]
        modeller.delete(hs)
        modeller.addHydrogens(forcefield=self._forcefield)
        print("preparing to add solvent")
        modeller.addSolvent(self._forcefield,
                            model=model,
                            padding=self._padding)
        solvated_topology = modeller.getTopology()
        solvated_positions = modeller.getPositions()
        print("solvent added, parameterizing")
        solvated_system = self._system_generator.build_system(
            solvated_topology)
        print("System parameterized")

        return solvated_topology, solvated_positions, solvated_system

    def _generate_ligand_only_topologies(self, old_positions, new_positions):
        """
        This method generates ligand-only topologies and positions from a TopologyProposal containing a solvated complex.
        The output of this method is then used when building the solvent-phase simulation with the same atom map.

        Parameters
        ----------
        topology_proposal : perses.rjmc.TopologyProposal
             TopologyProposal representing the solvated complex transformation

        Returns
        -------
        old_ligand_topology : app.Topology
            The old topology without the receptor or solvent
        new_ligand_topology : app.Topology
            The new topology without the receptor or solvent
        old_ligand_positions : [m, 3] ndarray of Quantity nm
            The positions of the old ligand without receptor or solvent
        new_ligand_positions : [n, 3] ndarray of Quantity nm
            The positions of the new ligand without receptor or solvent
        atom_map : dict of int: it
            The mapping between the two topologies without ligand or solvent.
        """
        old_complex = md.Topology.from_openmm(
            self._complex_topology_proposal.old_topology)
        new_complex = md.Topology.from_openmm(
            self._complex_topology_proposal.new_topology)

        complex_atom_map = self._complex_topology_proposal.old_to_new_atom_map

        old_mol_start_index, old_mol_len = self._complex_proposal_engine._find_mol_start_index(
            old_complex.to_openmm())
        new_mol_start_index, new_mol_len = self._complex_proposal_engine._find_mol_start_index(
            new_complex.to_openmm())

        old_pos = unit.Quantity(np.zeros([len(old_positions), 3]),
                                unit=unit.nanometers)
        old_pos[:, :] = old_positions
        old_ligand_positions = old_pos[old_mol_start_index:(
            old_mol_start_index + old_mol_len), :]
        new_ligand_positions = new_positions[new_mol_start_index:(
            new_mol_start_index + new_mol_len), :]

        #atom_map_adjusted = {}

        #loop through the atoms in the map. If the old index is creater than the old_mol_start_index but less than that
        #plus the old mol length, then it is valid to include its adjusted value in the map.
        #for old_idx, new_idx in complex_atom_map.items():
        #    if old_idx > old_mol_start_index and old_idx < old_mol_len + old_mol_start_index:
        #        atom_map_adjusted[old_idx - old_mol_len] = new_idx - new_mol_start_index

        #subset the topologies:

        old_ligand_topology = old_complex.subset(
            old_complex.select("resname == 'MOL' "))
        new_ligand_topology = new_complex.subset(
            new_complex.select("resname == 'MOL' "))

        #solvate the old ligand topology:
        old_solvated_topology, old_solvated_positions, old_solvated_system = self._solvate_system(
            old_ligand_topology.to_openmm(), old_ligand_positions)

        old_solvated_md_topology = md.Topology.from_openmm(
            old_solvated_topology)

        #now remove the old ligand, leaving only the solvent
        solvent_only_topology = old_solvated_md_topology.subset(
            old_solvated_md_topology.select("water"))

        #append the solvent to the new ligand-only topology:
        new_solvated_ligand_md_topology = new_ligand_topology.join(
            solvent_only_topology)
        nsl, b = new_solvated_ligand_md_topology.to_dataframe()
        #dirty hack because new_solvated_ligand_md_topology.to_openmm() was throwing bond topology error
        new_solvated_ligand_md_topology = md.Topology.from_dataframe(nsl, b)

        new_solvated_ligand_omm_topology = new_solvated_ligand_md_topology.to_openmm(
        )
        new_solvated_ligand_omm_topology.setPeriodicBoxVectors(
            old_solvated_topology.getPeriodicBoxVectors())

        #create the new ligand system:
        new_solvated_system = self._system_generator.build_system(
            new_solvated_ligand_omm_topology)

        new_to_old_atom_map = {
            complex_atom_map[x] - new_mol_start_index: x - old_mol_start_index
            for x in old_complex.select("resname == 'MOL' ")
            if x in complex_atom_map.keys()
        }
        #adjust the atom map to account for the presence of solvent degrees of freedom:
        #By design, all atoms after the ligands are water, and should be mapped.
        n_water_atoms = solvent_only_topology.to_openmm().getNumAtoms()
        for i in range(n_water_atoms):
            new_to_old_atom_map[new_mol_len + i] = old_mol_len + i

        #change the map to accomodate the TP:
        #new_to_old_atom_map = {value : key for key, value in atom_map_adjusted.items()}

        #make a TopologyProposal
        ligand_topology_proposal = TopologyProposal(
            new_topology=new_solvated_ligand_omm_topology,
            new_system=new_solvated_system,
            old_topology=old_solvated_topology,
            old_system=old_solvated_system,
            new_to_old_atom_map=new_to_old_atom_map,
            old_chemical_state_key='A',
            new_chemical_state_key='B')

        return ligand_topology_proposal, old_solvated_positions

    @property
    def complex_topology_proposal(self):
        return self._complex_topology_proposal

    @property
    def complex_old_positions(self):
        return self._complex_positions_old_solvated

    @property
    def complex_new_positions(self):
        return self._complex_positions_new_solvated

    @property
    def solvent_topology_proposal(self):
        return self._solvent_topology_proposal

    @property
    def solvent_old_positions(self):
        return self._old_solvent_positions

    @property
    def solvent_new_positions(self):
        return self._new_solvent_positions
Beispiel #16
0
def generate_top_pos_sys(topology, new_res, system, positions,
                         system_generator):
    """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation"""
    #create the point mutation engine
    print(f"generating point mutation engine")
    point_mutation_engine = PointMutationEngine(
        wildtype_topology=topology,
        system_generator=system_generator,
        chain_id=
        '1',  #denote the chain id allowed to mutate (it's always a string variable)
        max_point_mutants=1,
        residues_allowed_to_mutate=['2'],  #the residue ids allowed to mutate
        allowed_mutations=[
            ('2', new_res)
        ],  #the residue ids allowed to mutate with the three-letter code allowed to change
        aggregate=True)  #always allow aggregation

    #create a geometry engine
    print(f"generating geometry engine")
    geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                               use_sterics=False,
                                               n_bond_divisions=100,
                                               n_angle_divisions=180,
                                               n_torsion_divisions=360,
                                               verbose=True,
                                               storage=None,
                                               bond_softening_constant=1.0,
                                               angle_softening_constant=1.0,
                                               neglect_angles=False,
                                               use_14_nonbondeds=False)

    #create a top proposal
    print(f"making topology proposal")
    topology_proposal, local_map_stereo_sidechain, new_oemol_sidechain, old_oemol_sidechain = point_mutation_engine.propose(
        current_system=system, current_topology=topology)

    #make a geometry proposal forward
    print(f"making geometry proposal")
    forward_new_positions, logp_proposal = geometry_engine.propose(
        topology_proposal, positions, beta)

    #create a hybrid topology factory
    f"making forward hybridtopologyfactory"
    forward_htf = HybridTopologyFactory(topology_proposal=topology_proposal,
                                        current_positions=positions,
                                        new_positions=forward_new_positions,
                                        use_dispersion_correction=False,
                                        functions=None,
                                        softcore_alpha=None,
                                        bond_softening_constant=1.0,
                                        angle_softening_constant=1.0,
                                        soften_only_new=False,
                                        neglected_new_angle_terms=[],
                                        neglected_old_angle_terms=[],
                                        softcore_LJ_v2=True,
                                        softcore_electrostatics=True,
                                        softcore_LJ_v2_alpha=0.85,
                                        softcore_electrostatics_alpha=0.3,
                                        softcore_sigma_Q=1.0,
                                        interpolate_old_and_new_14s=False,
                                        omitted_terms=None)

    return topology_proposal, forward_new_positions, forward_htf, local_map_stereo_sidechain, old_oemol_sidechain, new_oemol_sidechain
Beispiel #17
0
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene",
                                           proposed_mol_name="benzene",
                                           current_mol_smiles=None,
                                           proposed_mol_smiles=None,
                                           vacuum=False,
                                           render_atom_mapping=False):
    """
    This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles.
    The function will (by default) read the iupac names first.  If they are set to None, then it will attempt to read a set of current and new smiles.
    An atom mapping pdf will be generated if specified.
    Arguments
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule
    current_mol_smiles : str (default None)
        current mol smiles
    proposed_mol_smiles : str (default None)
        proposed mol smiles
    vacuum: bool (default False)
        whether to render a vacuum or solvated topology_proposal
    render_atom_mapping : bool (default False)
        whether to render the atom map of the current_mol_name and proposed_mol_name

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from openeye import oechem
    from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol
    from openmoltools import forcefield_generators
    import perses.utils.openeye as openeye
    from perses.utils.data import get_data_filename
    from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine
    import simtk.unit as unit
    from perses.rjmc.geometry import FFAllAngleGeometryEngine

    if current_mol_name != None and proposed_mol_name != None:
        try:
            old_oemol, new_oemol = iupac_to_oemol(
                current_mol_name), iupac_to_oemol(proposed_mol_name)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(
                f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!"
            )
    elif current_mol_smiles != None and proposed_mol_smiles != None:
        try:
            old_oemol, new_oemol = smiles_to_oemol(
                current_mol_smiles), smiles_to_oemol(proposed_mol_smiles)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(f"the variables are not compatible")
    else:
        raise Exception(
            f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings."
        )

    old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES(
        old_smiles, title="MOL")

    #correct the old positions
    old_positions = openeye.extractPositionsFromOEMol(old_oemol)
    old_positions = old_positions.in_units_of(unit.nanometers)

    new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES(
        new_smiles, title="NEW")

    ffxml = forcefield_generators.generateForceFieldFromMolecules(
        [old_oemol, new_oemol])

    old_oemol.SetTitle('MOL')
    new_oemol.SetTitle('MOL')

    old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol)
    new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol)

    if not vacuum:
        nonbonded_method = app.PME
        barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                             300.0 * unit.kelvin, 50)
    else:
        nonbonded_method = app.NoCutoff
        barostat = None

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    system_generator = SystemGenerator(
        [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'],
        barostat=barostat,
        forcefield_kwargs={
            'removeCMMotion': False,
            'nonbondedMethod': nonbonded_method,
            'constraints': app.HBonds,
            'hydrogenMass': 4.0 * unit.amu
        })
    system_generator._forcefield.loadFile(StringIO(ffxml))

    proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles],
                                                     system_generator,
                                                     residue_name='MOL')
    geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                               use_sterics=False,
                                               n_bond_divisions=1000,
                                               n_angle_divisions=180,
                                               n_torsion_divisions=360,
                                               verbose=True,
                                               storage=None,
                                               bond_softening_constant=1.0,
                                               angle_softening_constant=1.0,
                                               neglect_angles=False)

    if not vacuum:
        #now to solvate
        modeller = app.Modeller(old_topology, old_positions)
        hs = [
            atom for atom in modeller.topology.atoms()
            if atom.element.symbol in ['H']
            and atom.residue.name not in ['MOL', 'OLD', 'NEW']
        ]
        modeller.delete(hs)
        modeller.addHydrogens(forcefield=system_generator._forcefield)
        modeller.addSolvent(system_generator._forcefield,
                            model='tip3p',
                            padding=9.0 * unit.angstroms)
        solvated_topology = modeller.getTopology()
        solvated_positions = modeller.getPositions()
        solvated_positions = unit.quantity.Quantity(value=np.array([
            list(atom_pos) for atom_pos in
            solvated_positions.value_in_unit_system(unit.md_unit_system)
        ]),
                                                    unit=unit.nanometers)
        solvated_system = system_generator.build_system(solvated_topology)

        #now to create proposal
        top_proposal = proposal_engine.propose(
            current_system=solvated_system,
            current_topology=solvated_topology,
            current_mol=old_oemol,
            proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal,
                                                   solvated_positions, beta)

        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(
                f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}"
            )
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol,
                                proposal_engine.non_offset_new_to_old_atom_map)

        return top_proposal, solvated_positions, new_positions

    else:
        vacuum_system = system_generator.build_system(old_topology)
        top_proposal = proposal_engine.propose(current_system=vacuum_system,
                                               current_topology=old_topology,
                                               current_mol=old_oemol,
                                               proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal, old_positions,
                                                   beta)
        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(f"new_to_old: {top_proposal._new_to_old_atom_map}")
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol, top_proposal._new_to_old_atom_map)
        return top_proposal, old_positions, new_positions
Beispiel #18
0
def validate_rjmc_work_variance(top_prop,
                                positions,
                                geometry_method=0,
                                num_iterations=10,
                                md_steps=250,
                                compute_timeseries=False,
                                md_system=None,
                                prespecified_conformers=None):
    """
    Arguments
    ----------
    top_prop : perses.rjmc.topology_proposal.TopologyProposal object
        topology_proposal
    md_system : openmm.System object, default None
        system from which md is conducted; the default is the top_prop._old_system
    geometry_method : int
        which geometry proposal method to use
            0: neglect_angles = True (this is supposed to be the zero-variance method)
            1: neglect_angles = False (this will accumulate variance)
            2: use_sterics = True (this is experimental)
    num_iterations: int
        number of times to run md_steps integrator
    md_steps: int
        number of md_steps to run in each num_iteration
    compute_timeseries = bool (default False)
        whether to use pymbar detectEquilibration and subsampleCorrelated data from the MD run (the potential energy is the data)
    prespecified_conformers = None or unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers)
        whether to input a unit.Quantity of conformers and bypass the conformer_generation/pymbar stage; None will default conduct this phase

    Returns
    -------
    conformers : unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers)
        decorrelated positions of the md run
    rj_works : list
        work from each conformer proposal
    """
    from openmmtools import integrators
    from perses.utils.openeye import smiles_to_oemol
    import simtk.unit as unit
    import simtk.openmm as openmm
    from openmmtools.constants import kB
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    import tqdm

    temperature = 300.0 * unit.kelvin  # unit-bearing temperature
    kT = kB * temperature  # unit-bearing thermal energy
    beta = 1.0 / kT  # unit-bearing inverse thermal energy

    #first, we must extract the top_prop relevant quantities
    topology = top_prop._old_topology
    if md_system == None:
        system = top_prop._old_system
    else:
        system = md_system

    if prespecified_conformers == None:

        #now we can specify conformations from MD
        integrator = integrators.LangevinIntegrator(
            collision_rate=1.0 / unit.picosecond,
            timestep=4.0 * unit.femtosecond,
            temperature=temperature)
        context = openmm.Context(system, integrator)
        context.setPositions(positions)
        openmm.LocalEnergyMinimizer.minimize(context)
        minimized_positions = context.getState(getPositions=True).getPositions(
            asNumpy=True)
        print(f"completed initial minimization")
        context.setPositions(minimized_positions)

        zeros = np.zeros([num_iterations, int(system.getNumParticles()), 3])
        conformers = unit.Quantity(zeros, unit=unit.nanometers)
        rps = np.zeros((num_iterations))

        print(f"conducting md sampling")
        for iteration in tqdm.trange(num_iterations):
            integrator.step(md_steps)
            state = context.getState(getPositions=True, getEnergy=True)
            new_positions = state.getPositions(asNumpy=True)
            conformers[iteration, :, :] = new_positions

            rp = state.getPotentialEnergy() * beta
            rps[iteration] = rp

        del context, integrator

        if compute_timeseries:
            print(f"computing production and data correlation")
            from pymbar import timeseries
            t0, g, Neff = timeseries.detectEquilibration(rps)
            series = timeseries.subsampleCorrelatedData(np.arange(
                t0, num_iterations),
                                                        g=g)
            print(f"production starts at index {t0} of {num_iterations}")
            print(f"the number of effective samples is {Neff}")
            indices = t0 + series
            print(f"the filtered indices are {indices}")

        else:
            indices = range(num_iterations)
    else:
        conformers = prespecified_conformers
        indices = range(len(conformers))

    #now we can define a geometry_engine
    if geometry_method == 0:
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=1000,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=True)
    elif geometry_method == 1:
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=1000,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False)
    elif geometry_method == 2:
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=True,
            n_bond_divisions=1000,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False)
    else:
        raise Exception(f"there is no geometry method for {geometry_method}")

    rj_works = []
    print(f"conducting geometry proposals...")
    for indx in tqdm.trange(len(indices)):
        index = indices[indx]
        print(f"index {indx}")
        new_positions, logp_forward = geometry_engine.propose(
            top_prop, conformers[index], beta)
        logp_backward = geometry_engine.logp_reverse(top_prop, new_positions,
                                                     conformers[index], beta)
        print(
            f"\tlogp_forward, logp_backward: {logp_forward}, {logp_backward}")
        added_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential
        subtracted_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential
        print(
            f"\tadded_energy, subtracted_energy: {added_energy}, {subtracted_energy}"
        )
        work = logp_forward - logp_backward + added_energy - subtracted_energy
        rj_works.append(work)
        print(f"\ttotal work: {work}")

    return conformers, rj_works
Beispiel #19
0
def generate_dipeptide_top_pos_sys(topology,
                                   new_res,
                                   system,
                                   positions,
                                   system_generator,
                                   conduct_geometry_prop=True,
                                   conduct_htf_prop=False):
    """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation"""
    from perses.tests.utils import validate_endstate_energies
    if conduct_htf_prop:
        assert conduct_geometry_prop, f"the htf prop can only be conducted if there is a geometry proposal"
    #create the point mutation engine
    from perses.rjmc.topology_proposal import PointMutationEngine
    point_mutation_engine = PointMutationEngine(
        wildtype_topology=topology,
        system_generator=system_generator,
        chain_id=
        '1',  #denote the chain id allowed to mutate (it's always a string variable)
        max_point_mutants=1,
        residues_allowed_to_mutate=['2'],  #the residue ids allowed to mutate
        allowed_mutations=[
            ('2', new_res)
        ],  #the residue ids allowed to mutate with the three-letter code allowed to change
        aggregate=True)  #always allow aggregation

    #create a top proposal
    print(f"making topology proposal")
    topology_proposal = point_mutation_engine.propose(
        current_system=system, current_topology=topology)

    if not conduct_geometry_prop:
        return topology_proposal

    if conduct_geometry_prop:
        #create a geometry engine
        print(f"generating geometry engine")
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=100,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False,
            use_14_nonbondeds=True)

        #make a geometry proposal forward
        print(
            f"making geometry proposal from {list(topology.residues())[1].name} to {new_res}"
        )
        forward_new_positions, logp_proposal = geometry_engine.propose(
            topology_proposal, positions, beta)
        logp_reverse = geometry_engine.logp_reverse(topology_proposal,
                                                    forward_new_positions,
                                                    positions, beta)

    if not conduct_htf_prop:
        return (topology_proposal, forward_new_positions, logp_proposal,
                logp_reverse)

    if conduct_htf_prop:
        #create a hybrid topology factory
        from perses.annihilation.relative import HybridTopologyFactory
        forward_htf = HybridTopologyFactory(
            topology_proposal=topology_proposal,
            current_positions=positions,
            new_positions=forward_new_positions,
            use_dispersion_correction=False,
            functions=None,
            softcore_alpha=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            soften_only_new=False,
            neglected_new_angle_terms=[],
            neglected_old_angle_terms=[],
            softcore_LJ_v2=True,
            softcore_electrostatics=True,
            softcore_LJ_v2_alpha=0.85,
            softcore_electrostatics_alpha=0.3,
            softcore_sigma_Q=1.0,
            interpolate_old_and_new_14s=False,
            omitted_terms=None)

        if not topology_proposal.unique_new_atoms:
            assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
            assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
            vacuum_added_valence_energy = 0.0
        else:
            added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

        if not topology_proposal.unique_old_atoms:
            assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
            assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
            subtracted_valence_energy = 0.0
        else:
            subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

        zero_state_error, one_state_error = validate_endstate_energies(
            forward_htf._topology_proposal,
            forward_htf,
            added_valence_energy,
            subtracted_valence_energy,
            beta=1.0 / (kB * temperature),
            ENERGY_THRESHOLD=ENERGY_THRESHOLD,
            platform=openmm.Platform.getPlatformByName('Reference'))
        print(f"zero state error : {zero_state_error}")
        print(f"one state error : {one_state_error}")

        return forward_htf
Beispiel #20
0
def generate_vacuum_topology_proposal(current_mol_name="benzene",
                                      proposed_mol_name="toluene"):
    """
    Generate a test vacuum topology proposal, current positions, and new positions triplet
    from two IUPAC molecule names.

    Parameters
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    from openmoltools import forcefield_generators

    from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename

    current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(
        current_mol_name)
    proposed_mol = createOEMolFromIUPAC(proposed_mol_name)

    initial_smiles = oechem.OEMolToSmiles(current_mol)
    final_smiles = oechem.OEMolToSmiles(proposed_mol)

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
    forcefield.registerTemplateGenerator(
        forcefield_generators.gaffTemplateGenerator)

    solvated_system = forcefield.createSystem(top_old, removeCMMotion=False)

    gaff_filename = get_data_filename('data/gaff.xml')
    system_generator = SystemGenerator(
        [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'],
        forcefield_kwargs={
            'removeCMMotion': False,
            'nonbondedMethod': app.NoCutoff
        })
    geometry_engine = FFAllAngleGeometryEngine()
    proposal_engine = SmallMoleculeSetProposalEngine(
        [initial_smiles, final_smiles],
        system_generator,
        residue_name=current_mol_name)

    #generate topology proposal
    topology_proposal = proposal_engine.propose(solvated_system,
                                                top_old,
                                                current_mol=current_mol,
                                                proposed_mol=proposed_mol)

    #generate new positions with geometry engine
    new_positions, _ = geometry_engine.propose(topology_proposal, pos_old,
                                               beta)

    return topology_proposal, pos_old, new_positions
Beispiel #21
0
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene",
                                           proposed_mol_name="benzene"):
    """
    Generate a test solvated topology proposal, current positions, and new positions triplet
    from two IUPAC molecule names.

    Parameters
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename

    current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(
        current_mol_name)
    proposed_mol = createOEMolFromIUPAC(proposed_mol_name)

    initial_smiles = oechem.OEMolToSmiles(current_mol)
    final_smiles = oechem.OEMolToSmiles(proposed_mol)

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
    forcefield.registerTemplateGenerator(
        forcefield_generators.gaffTemplateGenerator)

    modeller = app.Modeller(top_old, pos_old)
    modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom)
    solvated_topology = modeller.getTopology()
    solvated_positions = modeller.getPositions()
    solvated_system = forcefield.createSystem(solvated_topology,
                                              nonbondedMethod=app.PME,
                                              removeCMMotion=False)
    barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature,
                                         50)

    solvated_system.addForce(barostat)

    gaff_filename = get_data_filename('data/gaff.xml')

    system_generator = SystemGenerator(
        [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'],
        barostat=barostat,
        forcefield_kwargs={
            'removeCMMotion': False,
            'nonbondedMethod': app.PME
        })
    geometry_engine = FFAllAngleGeometryEngine()
    proposal_engine = SmallMoleculeSetProposalEngine(
        [initial_smiles, final_smiles],
        system_generator,
        residue_name=current_mol_name)

    #generate topology proposal
    topology_proposal = proposal_engine.propose(solvated_system,
                                                solvated_topology)

    #generate new positions with geometry engine
    new_positions, _ = geometry_engine.propose(topology_proposal,
                                               solvated_positions, beta)

    return topology_proposal, solvated_positions, new_positions
Beispiel #22
0
    def __init__(self,
                 protein_filename,
                 mutation_chain_id,
                 mutation_residue_id,
                 proposed_residue,
                 phase='complex',
                 conduct_endstate_validation=True,
                 ligand_input=None,
                 ligand_index=0,
                 water_model='tip3p',
                 ionic_strength=0.15 * unit.molar,
                 forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
                 barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50),
                 forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus},
                 periodic_forcefield_kwargs={'nonbondedMethod': app.PME},
                 nonperiodic_forcefield_kwargs=None,
                 small_molecule_forcefields='gaff-2.11',
                 complex_box_dimensions=None,
                 apo_box_dimensions=None,
                 flatten_torsions=False,
                 flatten_exceptions=False,
                 repartitioned_endstate=None,
                 **kwargs):
        """
        arguments
            protein_filename : str
                path to protein (to mutate); .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory,
                endstate validation cannot and will not be conducted.
            ligand_file : str, default None
                path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb
            ligand_index : int, default 0
                which ligand to use
            water_model : str, default 'tip3p'
                solvent model to use for solvation
            ionic_strength : float * unit.molar, default 0.15 * unit.molar
                the total concentration of ions (both positive and negative) to add using Modeller.
                This does not include ions that are added to neutralize the system.
                Note that only monovalent ions are currently supported.
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME}
                periodic forcefield kwargs for system parametrization
            nonperiodic_forcefield_kwargs : dict, default None
                non-periodic forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization
            complex_box_dimensions : Vec3, default None
                define box dimensions of complex phase;
                if None, padding is 1nm
            apo_box_dimensions :  Vec3, default None
                define box dimensions of apo phase phase;
                if None, padding is 1nm
            flatten_torsions : bool, default False
                in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1
            flatten_exceptions : bool, default False
                in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1
            repartitioned_endstate : int, default None
                the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None,
                meaning a vanilla HybridTopologyFactory will be built.
        TODO : allow argument for spectator ligands besides the 'ligand_file'

        """

        # First thing to do is load the apo protein to mutate...
        protein_pdbfile = open(protein_filename, 'r')
        protein_pdb = app.PDBFile(protein_pdbfile)
        protein_pdbfile.close()
        protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology)
        protein_topology = protein_md_topology.to_openmm()
        protein_n_atoms = protein_md_topology.n_atoms

        # Load the ligand, if present
        molecules = []
        if ligand_input:
            if isinstance(ligand_input, str):
                if ligand_input.endswith('.sdf'): # small molecule
                        ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index)
                        molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
                        ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol),  forcefield_generators.generateTopologyFromOEMol(ligand_mol)
                        ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                        ligand_n_atoms = ligand_md_topology.n_atoms

                if ligand_input.endswith('pdb'): # protein
                    ligand_pdbfile = open(ligand_input, 'r')
                    ligand_pdb = app.PDBFile(ligand_pdbfile)
                    ligand_pdbfile.close()
                    ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm(
                        ligand_pdb.topology)
                    ligand_n_atoms = ligand_md_topology.n_atoms

            elif isinstance(ligand_input, oechem.OEMol): # oemol object
                molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False))
                ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input),  forcefield_generators.generateTopologyFromOEMol(ligand_input)
                ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                ligand_n_atoms = ligand_md_topology.n_atoms

            else:
                _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file')
                return

            # Now create a complex
            complex_md_topology = protein_md_topology.join(ligand_md_topology)
            complex_topology = complex_md_topology.to_openmm()
            complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers)
            complex_positions[:protein_n_atoms, :] = protein_positions
            complex_positions[protein_n_atoms:, :] = ligand_positions

        # Now for a system_generator
        self.system_generator = SystemGenerator(forcefields=forcefield_files,
                                                barostat=barostat,
                                                forcefield_kwargs=forcefield_kwargs,
                                                periodic_forcefield_kwargs=periodic_forcefield_kwargs,
                                                nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                                small_molecule_forcefield=small_molecule_forcefields,
                                                molecules=molecules,
                                                cache=None)

        # Solvate apo and complex...
        apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions))
        inputs = [apo_input]
        if ligand_input:
            inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions))

        geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                                use_sterics=False,
                                                n_bond_divisions=100,
                                                n_angle_divisions=180,
                                                n_torsion_divisions=360,
                                                verbose=True,
                                                storage=None,
                                                bond_softening_constant=1.0,
                                                angle_softening_constant=1.0,
                                                neglect_angles = False,
                                                use_14_nonbondeds = True)


        # Run pipeline...
        htfs = []
        for (top, pos, sys) in inputs:
            point_mutation_engine = PointMutationEngine(wildtype_topology=top,
                                                                 system_generator=self.system_generator,
                                                                 chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable)
                                                                 max_point_mutants=1,
                                                                 residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate
                                                                 allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change
                                                                 aggregate=True) # Always allow aggregation

            topology_proposal = point_mutation_engine.propose(sys, top)

            # Only validate energy bookkeeping if the WT and proposed residues do not involve rings
            old_res = [res for res in top.residues() if res.id == mutation_residue_id][0]
            validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True
            new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta,
                                                                   validate_energy_bookkeeping=validate_bool)
            logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta,
                                                        validate_energy_bookkeeping=validate_bool)

            if repartitioned_endstate is None:
                factory = HybridTopologyFactory
            elif repartitioned_endstate in [0, 1]:
                factory = RepartitionedHybridTopologyFactory

            forward_htf = factory(topology_proposal=topology_proposal,
                                  current_positions=pos,
                                  new_positions=new_positions,
                                  use_dispersion_correction=False,
                                  functions=None,
                                  softcore_alpha=None,
                                  bond_softening_constant=1.0,
                                  angle_softening_constant=1.0,
                                  soften_only_new=False,
                                  neglected_new_angle_terms=[],
                                  neglected_old_angle_terms=[],
                                  softcore_LJ_v2=True,
                                  softcore_electrostatics=True,
                                  softcore_LJ_v2_alpha=0.85,
                                  softcore_electrostatics_alpha=0.3,
                                  softcore_sigma_Q=1.0,
                                  interpolate_old_and_new_14s=flatten_exceptions,
                                  omitted_terms=None,
                                  endstate=repartitioned_endstate,
                                  flatten_torsions=flatten_torsions)

            if not topology_proposal.unique_new_atoms:
                assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
                assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
            else:
                added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

            if not topology_proposal.unique_old_atoms:
                assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
                assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
                subtracted_valence_energy = 0.0
            else:
                subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential


            if conduct_endstate_validation and repartitioned_endstate is None:
                zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD)
                if zero_state_error > ENERGY_THRESHOLD:
                    _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}")
                if one_state_error > ENERGY_THRESHOLD:
                    _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}")
            else:
                pass

            htfs.append(forward_htf)

        self.apo_htf = htfs[0]
        self.complex_htf = htfs[1] if ligand_input else None
Beispiel #23
0
class ExpandedEnsembleSampler(object):
    """
    Method of expanded ensembles sampling engine.

    Properties
    ----------
    sampler : MCMCSampler
        The MCMC sampler used for updating positions.
    proposal_engine : ProposalEngine
        The ProposalEngine to use for proposing new sampler states and topologies.
    system_generator : SystemGenerator
        The SystemGenerator to use for creating System objects following proposals.
    state : hashable object
        The current sampler state. Can be any hashable object.
    states : set of hashable object
        All known states.
    iteration : int
        Iterations completed.
    naccepted : int
        Number of accepted thermodynamic/chemical state changes.
    nrejected : int
        Number of rejected thermodynamic/chemical state changes.
    number_of_state_visits : dict of state_key
        Cumulative counts of visited states.
    verbose : bool
        If True, verbose output is printed.

    References
    ----------
    [1] Lyubartsev AP, Martsinovski AA, Shevkunov SV, and Vorontsov-Velyaminov PN. New approach to Monte Carlo calculation of the free energy: Method of expanded ensembles. JCP 96:1776, 1992
    http://dx.doi.org/10.1063/1.462133

    Examples
    --------
    >>> # Create a test system
    >>> test = testsystems.AlanineDipeptideVacuum()
    >>> # Create a SystemGenerator and rebuild the System.
    >>> from perses.rjmc.topology_proposal import SystemGenerator
    >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : None })
    >>> test.system = system_generator.build_system(test.topology)
    >>> # Create a sampler state.
    >>> sampler_state = SamplerState(system=test.system, positions=test.positions)
    >>> # Create a thermodynamic state.
    >>> thermodynamic_state = ThermodynamicState(system=test.system, temperature=298.0*unit.kelvin)
    >>> # Create an MCMC sampler
    >>> mcmc_sampler = MCMCSampler(thermodynamic_state, sampler_state)
    >>> # Turn off verbosity
    >>> mcmc_sampler.verbose = False
    >>> # Create an Expanded Ensemble sampler
    >>> from perses.rjmc.topology_proposal import PointMutationEngine
    >>> allowed_mutations = [[('2','ALA')],[('2','VAL'),('2','LEU')]]
    >>> proposal_engine = PointMutationEngine(system_generator, max_point_mutants=1, chain_id='1', proposal_metadata=None, allowed_mutations=allowed_mutations)
    >>> exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, test.topology, 'ACE-ALA-NME', proposal_engine)
    >>> # Run the sampler
    >>> exen_sampler.run()

    """
    def __init__(self, sampler, topology, state_key, proposal_engine, log_weights=None, scheme='ncmc-geometry-ncmc', options=dict(), platform=None):
        """
        Create an expanded ensemble sampler.

        p(x,k) \propto \exp[-u_k(x) + g_k]

        where g_k is the log weight.

        Parameters
        ----------
        sampler : MCMCSampler
            MCMCSampler initialized with current SamplerState
        topology : simtk.openmm.app.Topology
            Current topology
        state : hashable object
            Current chemical state
        proposal_engine : ProposalEngine
            ProposalEngine to use for proposing new chemical states
        log_weights : dict of object : float
            Log weights to use for expanded ensemble biases.
        scheme : str, optional, default='ncmc-geometry-ncmc'
            Update scheme. One of ['ncmc-geometry-ncmc', 'geometry-ncmc-geometry', 'geometry-ncmc']
        options : dict, optional, default=dict()
            Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC
        platform : simtk.openmm.Platform, optional, default=None
            Platform to use for NCMC switching.  If `None`, default (fastest) platform is used.

        """
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self.topology = topology
        self.state_key = state_key
        self.proposal_engine = proposal_engine
        self.log_weights = log_weights
        self.scheme = scheme
        if self.log_weights is None: self.log_weights = dict()

        # Initialize
        self.iteration = 0
        option_names = ['timestep', 'nsteps', 'functions']
        for option_name in option_names:
            if option_name not in options:
                options[option_name] = None
        from perses.annihilation.ncmc_switching import NCMCEngine
        self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature, timestep=options['timestep'], nsteps=options['nsteps'], functions=options['functions'], platform=platform)
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        self.geometry_engine = FFAllAngleGeometryEngine({'data': 0})
        self.naccepted = 0
        self.nrejected = 0
        self.number_of_state_visits = dict()
        self.verbose = False
        self.pdbfile = None # if not None, write PDB file
        self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals
        self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs


    @property
    def state_keys(self):
        return log_weights.keys()

    def get_log_weight(self, state_key):
        """
        Get the log weight of the specified state.

        Parameters
        ----------
        state_key : hashable object
            The state key (e.g. chemical state key) to look up.

        Returns
        -------
        log_weight : float
            The log weight of the provided state key.

        Note
        ----
        This adds the key to the self.log_weights dict.

        """
        if state_key not in self.log_weights:
            self.log_weights[state_key] = 0.0
        return self.log_weights[state_key]

    def update_positions(self):
        """
        Sample new positions.
        """
        self.sampler.update()

    def update_state(self):
        """
        Sample the thermodynamic state.
        """
        # Check that system and topology have same number of atoms.
        old_system = self.sampler.sampler_state.system
        old_topology = self.topology
        old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms
        old_system_natoms = old_system.getNumParticles()
        if old_topology_natoms != old_system_natoms:
            msg = 'ExpandedEnsembleSampler: topology has %d atoms, while system has %d atoms' % (old_topology_natoms, old_system_natoms)
            raise Exception(msg)

        if self.scheme == 'ncmc-geometry-ncmc':
            if self.verbose: print("Updating chemical state with ncmc-geometry-ncmc scheme...")

            # DEBUG: Check current topology can be built.
            try:
                self.proposal_engine._system_generator.build_system(self.topology)
            except Exception as e:
                msg = str(e)
                msg += '\n'
                msg += 'ExpandedEnsembleSampler.update_sampler: self.topology before ProposalEngine call cannot be built into a system'
                raise Exception(msg)

            # Propose new chemical state.
            if self.verbose: print("Proposing new topology...")
            [system, topology, positions] = [self.sampler.thermodynamic_state.system, self.topology, self.sampler.sampler_state.positions]
            topology_proposal = self.proposal_engine.propose(system, topology)
            if self.verbose: print("Proposed transformation: %s => %s" % (topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key))

            # DEBUG: Check current topology can be built.
            if self.verbose: print("Generating new system...")
            try:
                self.proposal_engine._system_generator.build_system(topology_proposal.new_topology)
            except Exception as e:
                msg = str(e)
                msg += '\n'
                msg += 'ExpandedEnsembleSampler.update_sampler: toology_proposal.new_topology before ProposalEngine call cannot be built into a system'
                raise Exception(msg)

            # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map
            natoms_old = topology_proposal.old_system.getNumParticles()
            natoms_new = topology_proposal.new_system.getNumParticles()
            if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)):
                msg = "Some old atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_old
                msg += str(topology_proposal.new_to_old_atom_map)
                raise Exception(msg)
            if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)):
                msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new
                msg += str(topology_proposal.new_to_old_atom_map)
                raise Exception(msg)

            # Determine state keys
            old_state_key = self.state_key
            new_state_key = topology_proposal.new_chemical_state_key

            # Determine log weight
            old_log_weight = self.get_log_weight(old_state_key)
            new_log_weight = self.get_log_weight(new_state_key)

            if self.verbose: print("Performing NCMC annihilation")
            # Alchemically eliminate atoms being removed.
            [ncmc_old_positions, ncmc_elimination_logp, potential_delete] = self.ncmc_engine.integrate(topology_proposal, positions, direction='delete')
            # Check that positions are not NaN
            if np.any(np.isnan(ncmc_old_positions)):
                raise Exception("Positions are NaN after NCMC delete with %d steps" % switching_nsteps)

            if self.verbose: print("Geometry engine proposal...")
            # Generate coordinates for new atoms and compute probability ratio of old and new probabilities.
            geometry_old_positions = ncmc_old_positions
            geometry_new_positions, geometry_logp_propose = self.geometry_engine.propose(topology_proposal, geometry_old_positions, self.sampler.thermodynamic_state.beta)

            if self.geometry_pdbfile is not None:
                print("Writing proposed geometry...")
                #self.geometry_pdbfile.write('MODEL     %4d\n' % (self.iteration+1)) # PyMOL doesn't render connectivity correctly this way
                from simtk.openmm.app import PDBFile
                PDBFile.writeFile(topology_proposal.new_topology, geometry_new_positions, file=self.geometry_pdbfile)
                #self.geometry_pdbfile.write('ENDMDL\n')
                self.geometry_pdbfile.flush()

            geometry_logp_reverse = self.geometry_engine.logp_reverse(topology_proposal, geometry_new_positions, geometry_old_positions, self.sampler.thermodynamic_state.beta)
            geometry_logp = geometry_logp_reverse - geometry_logp_propose

            if self.verbose: print("Performing NCMC insertion")
            # Alchemically introduce new atoms.
            [ncmc_new_positions, ncmc_introduction_logp, potential_insert] = self.ncmc_engine.integrate(topology_proposal, geometry_new_positions, direction='insert')
            # Check that positions are not NaN
            if np.any(np.isnan(ncmc_new_positions)):
                raise Exception("Positions are NaN after NCMC insert with %d steps" % switching_nsteps)

            # Compute change in eliminated potential contribution.
            switch_logp = - (potential_insert - potential_delete)
            if self.verbose:
                print('potential before geometry  : %12.3f kT' % potential_delete)
                print('potential after geometry   : %12.3f kT' % potential_insert)
                print('---------------------------------------------------------')
                print('switch_logp                : %12.3f' % switch_logp)
                print('geometry_logp_propose      : %12.3f' % geometry_logp_propose)
                print('geometry_logp_reverse      : %12.3f' % geometry_logp_reverse)

            # Compute total log acceptance probability, including all components.
            logp_accept = topology_proposal.logp_proposal + geometry_logp + switch_logp + ncmc_elimination_logp + ncmc_introduction_logp + new_log_weight - old_log_weight
            if self.verbose:
                print("logp_accept = %+10.4e [logp_proposal %+10.4e geometry_logp %+10.4e switch_logp %+10.4e ncmc_elimination_logp %+10.4e ncmc_introduction_logp %+10.4e old_log_weight %+10.4e new_log_weight %+10.4e]"
                    % (logp_accept, topology_proposal.logp_proposal, geometry_logp, switch_logp, ncmc_elimination_logp, ncmc_introduction_logp, old_log_weight, new_log_weight))

            # Accept or reject.
            if np.isnan(logp_accept):
                accept = False
                print('logp_accept = NaN')
            else:
                accept = ((logp_accept>=0.0) or (np.random.uniform() < np.exp(logp_accept)))
                if self.accept_everything:
                    print('accept_everything option is turned on; accepting')
                    accept = True

            if accept:
                self.sampler.thermodynamic_state.system = topology_proposal.new_system
                self.sampler.sampler_state.system = topology_proposal.new_system
                self.topology = topology_proposal.new_topology
                self.sampler.sampler_state.positions = ncmc_new_positions
                self.state_key = topology_proposal.new_chemical_state_key
                self.naccepted += 1
                if self.verbose: print("    accepted")
            else:
                self.nrejected += 1
                if self.verbose: print("    rejected")

        else:
            raise Exception("Expanded ensemble state proposal scheme '%s' unsupported" % self.scheme)

        # Update statistics.
        self.update_statistics()

    def update(self):
        """
        Update the sampler with one step of sampling.
        """
        if self.verbose:
            print("-" * 80)
            print("Expanded Ensemble sampler iteration %8d" % self.iteration)
        self.update_positions()
        self.update_state()
        self.iteration += 1
        if self.verbose:
            print("-" * 80)

        if self.pdbfile is not None:
            print("Writing frame...")
            from simtk.openmm.app import PDBFile
            PDBFile.writeModel(self.topology, self.sampler.sampler_state.positions, self.pdbfile, self.iteration)
            self.pdbfile.flush()

    def run(self, niterations=1):
        """
        Run the sampler for the specified number of iterations

        Parameters
        ----------
        niterations : int, optional, default=1
            Number of iterations to run the sampler for.
        """
        for iteration in range(niterations):
            self.update()

    def update_statistics(self):
        """
        Update sampler statistics.
        """
        if self.state_key not in self.number_of_state_visits:
            self.number_of_state_visits[self.state_key] = 0
        self.number_of_state_visits[self.state_key] += 1