def run_rj_proposals(top_prop, configuration_traj, use_sterics, ncmc_nsteps, n_replicates, box_vectors, temperature=300.0*unit.kelvin):
    ncmc_engine = NCMCEngine(nsteps=ncmc_nsteps, pressure=1.0*unit.atmosphere)
    geometry_engine = FFAllAngleGeometryEngine(use_sterics=use_sterics)
    initial_thermodynamic_state = states.ThermodynamicState(top_prop.old_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    final_thermodynamic_state = states.ThermodynamicState(top_prop.new_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    traj_indices = np.arange(0, configuration_traj.n_frames)
    results = np.zeros([n_replicates, 7])
    beta = 1.0 / (temperature * constants.kB)

    for i in tqdm.trange(n_replicates):
        frame_index = np.random.choice(traj_indices)

        initial_sampler_state = traj_frame_to_sampler_state(configuration_traj, frame_index,box_vectors)
        initial_logP = - compute_reduced_potential(initial_thermodynamic_state, initial_sampler_state)

        proposed_geometry, logP_geometry_forward = geometry_engine.propose(top_prop, initial_sampler_state.positions, beta)

        proposed_sampler_state = states.SamplerState(proposed_geometry, box_vectors=initial_sampler_state.box_vectors)

        final_old_sampler_state, final_sampler_state, logP_work, initial_hybrid_logP, final_hybrid_logP = ncmc_engine.integrate(top_prop, initial_sampler_state, proposed_sampler_state)

        final_logP = - compute_reduced_potential(final_thermodynamic_state, final_sampler_state)

        logP_reverse = geometry_engine.logp_reverse(top_prop, final_sampler_state.positions, final_old_sampler_state.positions, beta)

        results[i, 0] = initial_logP
        results[i, 1] = logP_reverse
        results[i, 2] = final_logP
        results[i, 3] = logP_work
        results[i, 4] = initial_hybrid_logP
        results[i, 5] = final_hybrid_logP
        results[i, 6] = logP_geometry_forward

    return results
Beispiel #2
0
def run_rj_proposals(top_prop, configuration_traj, use_sterics, ncmc_nsteps, n_replicates, bond_softening_constant=1.0, angle_softening_constant=1.0):
    ncmc_engine = NCMCEngine(nsteps=ncmc_nsteps, pressure=1.0*unit.atmosphere, bond_softening_constant=bond_softening_constant, angle_softening_constant=angle_softening_constant)
    geometry_engine = FFAllAngleGeometryEngine(use_sterics=use_sterics, bond_softening_constant=bond_softening_constant, angle_softening_constant=angle_softening_constant)
    initial_thermodynamic_state = states.ThermodynamicState(top_prop.old_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    final_thermodynamic_state = states.ThermodynamicState(top_prop.new_system, temperature=temperature, pressure=1.0*unit.atmosphere)
    traj_indices = np.arange(0, configuration_traj.n_frames)
    results = np.zeros([n_replicates, 4])

    for i in tqdm.trange(n_replicates):
        frame_index = np.random.choice(traj_indices)
        initial_sampler_state = traj_frame_to_sampler_state(configuration_traj, frame_index)

        initial_logP = - compute_reduced_potential(initial_thermodynamic_state, initial_sampler_state)

        proposed_geometry, logP_geometry_forward = geometry_engine.propose(top_prop, initial_sampler_state.positions, beta)

        proposed_sampler_state = states.SamplerState(proposed_geometry, box_vectors=initial_sampler_state.box_vectors)

        final_old_sampler_state, final_sampler_state, logP_work, initial_hybrid_logP, final_hybrid_logP = ncmc_engine.integrate(top_prop, initial_sampler_state, proposed_sampler_state)

        final_logP = - compute_reduced_potential(final_thermodynamic_state, final_sampler_state)

        logP_reverse = geometry_engine.logp_reverse(top_prop, final_sampler_state.positions, final_old_sampler_state.positions, beta)

        results[i, 0] = initial_hybrid_logP - initial_logP
        results[i, 1] = logP_reverse - logP_geometry_forward
        results[i, 2] = final_logP - final_hybrid_logP
        results[i, 3] = logP_work

    return results
    def run_rj_simple_system(self, configurations_initial, topology_proposal, n_replicates):
        """
        Function to execute reversibje jump MC

        Arguments
        ---------
        configurations_initial: openmm.Quantity
            n_replicate frames of equilibrium simulation of initial system
        topology_proposal: dict
            perses.topology_proposal object
        n_replicates: int
            number of replicates to simulate

        Returns
        -------
        logPs: numpy ndarray
            shape = (n_replicates, 4) where logPs[i] = (reduced potential of initial molecule, log proposal probability, reversed log proposal probability, reduced potential of proposed molecule)
        final_positions: list
            list of openmm position objects for final molecule proposal
        """
        import tqdm
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        final_positions = []
        logPs = np.zeros([n_replicates, 4])
        _geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = True)
        for _replicate_idx in tqdm.trange(n_replicates):
            _old_positions = configurations_initial[_replicate_idx, :, :]
            _new_positions, _lp = _geometry_engine.propose(topology_proposal, _old_positions, beta)
            _lp_reverse = _geometry_engine.logp_reverse(topology_proposal, _new_positions, _old_positions, beta)
            _initial_rp = self.compute_rp(topology_proposal.old_system, _old_positions)
            if not topology_proposal.unique_old_atoms: #the geometry engine doesn't run the backward proposal
                logPs[_replicate_idx, 0] = _geometry_engine.forward_atoms_with_positions_reduced_potential
                logPs[_replicate_idx, 3] = _geometry_engine.forward_final_context_reduced_potential
            elif not topology_proposal.unique_new_atoms: #the geometry engine doesn't run forward
                logPs[_replicate_idx, 0] = _geometry_engine.reverse_final_context_reduced_potential
                logPs[_replicate_idx, 3] = _geometry_engine.reverse_atoms_with_positions_reduced_potential
            else:
                logPs[_replicate_idx, 0] = _geometry_engine.reverse_final_context_reduced_potential
                logPs[_replicate_idx, 3] = _geometry_engine.forward_final_context_reduced_potential
            logPs[_replicate_idx, 1] = _lp
            logPs[_replicate_idx, 2] = _lp_reverse
            final_rp = self.compute_rp(topology_proposal.new_system, _new_positions)
            final_positions.append(_new_positions)
        return logPs, final_positions
Beispiel #4
0
    def __init__(self,
                 protein_filename,
                 mutation_chain_id,
                 mutation_residue_id,
                 proposed_residue,
                 phase='complex',
                 conduct_endstate_validation=True,
                 ligand_input=None,
                 ligand_index=0,
                 water_model='tip3p',
                 ionic_strength=0.15 * unit.molar,
                 forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
                 barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50),
                 forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus},
                 periodic_forcefield_kwargs={'nonbondedMethod': app.PME},
                 nonperiodic_forcefield_kwargs=None,
                 small_molecule_forcefields='gaff-2.11',
                 complex_box_dimensions=None,
                 apo_box_dimensions=None,
                 flatten_torsions=False,
                 flatten_exceptions=False,
                 repartitioned_endstate=None,
                 **kwargs):
        """
        arguments
            protein_filename : str
                path to protein (to mutate); .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory,
                endstate validation cannot and will not be conducted.
            ligand_file : str, default None
                path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb
            ligand_index : int, default 0
                which ligand to use
            water_model : str, default 'tip3p'
                solvent model to use for solvation
            ionic_strength : float * unit.molar, default 0.15 * unit.molar
                the total concentration of ions (both positive and negative) to add using Modeller.
                This does not include ions that are added to neutralize the system.
                Note that only monovalent ions are currently supported.
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME}
                periodic forcefield kwargs for system parametrization
            nonperiodic_forcefield_kwargs : dict, default None
                non-periodic forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization
            complex_box_dimensions : Vec3, default None
                define box dimensions of complex phase;
                if None, padding is 1nm
            apo_box_dimensions :  Vec3, default None
                define box dimensions of apo phase phase;
                if None, padding is 1nm
            flatten_torsions : bool, default False
                in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1
            flatten_exceptions : bool, default False
                in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1
            repartitioned_endstate : int, default None
                the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None,
                meaning a vanilla HybridTopologyFactory will be built.
        TODO : allow argument for spectator ligands besides the 'ligand_file'

        """

        # First thing to do is load the apo protein to mutate...
        protein_pdbfile = open(protein_filename, 'r')
        protein_pdb = app.PDBFile(protein_pdbfile)
        protein_pdbfile.close()
        protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology)
        protein_topology = protein_md_topology.to_openmm()
        protein_n_atoms = protein_md_topology.n_atoms

        # Load the ligand, if present
        molecules = []
        if ligand_input:
            if isinstance(ligand_input, str):
                if ligand_input.endswith('.sdf'): # small molecule
                        ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index)
                        molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
                        ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol),  forcefield_generators.generateTopologyFromOEMol(ligand_mol)
                        ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                        ligand_n_atoms = ligand_md_topology.n_atoms

                if ligand_input.endswith('pdb'): # protein
                    ligand_pdbfile = open(ligand_input, 'r')
                    ligand_pdb = app.PDBFile(ligand_pdbfile)
                    ligand_pdbfile.close()
                    ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm(
                        ligand_pdb.topology)
                    ligand_n_atoms = ligand_md_topology.n_atoms

            elif isinstance(ligand_input, oechem.OEMol): # oemol object
                molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False))
                ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input),  forcefield_generators.generateTopologyFromOEMol(ligand_input)
                ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                ligand_n_atoms = ligand_md_topology.n_atoms

            else:
                _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file')
                return

            # Now create a complex
            complex_md_topology = protein_md_topology.join(ligand_md_topology)
            complex_topology = complex_md_topology.to_openmm()
            complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers)
            complex_positions[:protein_n_atoms, :] = protein_positions
            complex_positions[protein_n_atoms:, :] = ligand_positions

        # Now for a system_generator
        self.system_generator = SystemGenerator(forcefields=forcefield_files,
                                                barostat=barostat,
                                                forcefield_kwargs=forcefield_kwargs,
                                                periodic_forcefield_kwargs=periodic_forcefield_kwargs,
                                                nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                                small_molecule_forcefield=small_molecule_forcefields,
                                                molecules=molecules,
                                                cache=None)

        # Solvate apo and complex...
        apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions))
        inputs = [apo_input]
        if ligand_input:
            inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions))

        geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                                use_sterics=False,
                                                n_bond_divisions=100,
                                                n_angle_divisions=180,
                                                n_torsion_divisions=360,
                                                verbose=True,
                                                storage=None,
                                                bond_softening_constant=1.0,
                                                angle_softening_constant=1.0,
                                                neglect_angles = False,
                                                use_14_nonbondeds = True)


        # Run pipeline...
        htfs = []
        for (top, pos, sys) in inputs:
            point_mutation_engine = PointMutationEngine(wildtype_topology=top,
                                                                 system_generator=self.system_generator,
                                                                 chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable)
                                                                 max_point_mutants=1,
                                                                 residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate
                                                                 allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change
                                                                 aggregate=True) # Always allow aggregation

            topology_proposal = point_mutation_engine.propose(sys, top)

            # Only validate energy bookkeeping if the WT and proposed residues do not involve rings
            old_res = [res for res in top.residues() if res.id == mutation_residue_id][0]
            validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True
            new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta,
                                                                   validate_energy_bookkeeping=validate_bool)
            logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta,
                                                        validate_energy_bookkeeping=validate_bool)

            if repartitioned_endstate is None:
                factory = HybridTopologyFactory
            elif repartitioned_endstate in [0, 1]:
                factory = RepartitionedHybridTopologyFactory

            forward_htf = factory(topology_proposal=topology_proposal,
                                  current_positions=pos,
                                  new_positions=new_positions,
                                  use_dispersion_correction=False,
                                  functions=None,
                                  softcore_alpha=None,
                                  bond_softening_constant=1.0,
                                  angle_softening_constant=1.0,
                                  soften_only_new=False,
                                  neglected_new_angle_terms=[],
                                  neglected_old_angle_terms=[],
                                  softcore_LJ_v2=True,
                                  softcore_electrostatics=True,
                                  softcore_LJ_v2_alpha=0.85,
                                  softcore_electrostatics_alpha=0.3,
                                  softcore_sigma_Q=1.0,
                                  interpolate_old_and_new_14s=flatten_exceptions,
                                  omitted_terms=None,
                                  endstate=repartitioned_endstate,
                                  flatten_torsions=flatten_torsions)

            if not topology_proposal.unique_new_atoms:
                assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
                assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
            else:
                added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

            if not topology_proposal.unique_old_atoms:
                assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
                assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
                subtracted_valence_energy = 0.0
            else:
                subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential


            if conduct_endstate_validation and repartitioned_endstate is None:
                zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD)
                if zero_state_error > ENERGY_THRESHOLD:
                    _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}")
                if one_state_error > ENERGY_THRESHOLD:
                    _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}")
            else:
                pass

            htfs.append(forward_htf)

        self.apo_htf = htfs[0]
        self.complex_htf = htfs[1] if ligand_input else None
Beispiel #5
0
def generate_dipeptide_top_pos_sys(topology,
                                   new_res,
                                   system,
                                   positions,
                                   system_generator,
                                   conduct_geometry_prop=True,
                                   conduct_htf_prop=False):
    """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation"""
    from perses.tests.utils import validate_endstate_energies
    if conduct_htf_prop:
        assert conduct_geometry_prop, f"the htf prop can only be conducted if there is a geometry proposal"
    #create the point mutation engine
    from perses.rjmc.topology_proposal import PointMutationEngine
    point_mutation_engine = PointMutationEngine(
        wildtype_topology=topology,
        system_generator=system_generator,
        chain_id=
        '1',  #denote the chain id allowed to mutate (it's always a string variable)
        max_point_mutants=1,
        residues_allowed_to_mutate=['2'],  #the residue ids allowed to mutate
        allowed_mutations=[
            ('2', new_res)
        ],  #the residue ids allowed to mutate with the three-letter code allowed to change
        aggregate=True)  #always allow aggregation

    #create a top proposal
    print(f"making topology proposal")
    topology_proposal = point_mutation_engine.propose(
        current_system=system, current_topology=topology)

    if not conduct_geometry_prop:
        return topology_proposal

    if conduct_geometry_prop:
        #create a geometry engine
        print(f"generating geometry engine")
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=100,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False,
            use_14_nonbondeds=True)

        #make a geometry proposal forward
        print(
            f"making geometry proposal from {list(topology.residues())[1].name} to {new_res}"
        )
        forward_new_positions, logp_proposal = geometry_engine.propose(
            topology_proposal, positions, beta)
        logp_reverse = geometry_engine.logp_reverse(topology_proposal,
                                                    forward_new_positions,
                                                    positions, beta)

    if not conduct_htf_prop:
        return (topology_proposal, forward_new_positions, logp_proposal,
                logp_reverse)

    if conduct_htf_prop:
        #create a hybrid topology factory
        from perses.annihilation.relative import HybridTopologyFactory
        forward_htf = HybridTopologyFactory(
            topology_proposal=topology_proposal,
            current_positions=positions,
            new_positions=forward_new_positions,
            use_dispersion_correction=False,
            functions=None,
            softcore_alpha=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            soften_only_new=False,
            neglected_new_angle_terms=[],
            neglected_old_angle_terms=[],
            softcore_LJ_v2=True,
            softcore_electrostatics=True,
            softcore_LJ_v2_alpha=0.85,
            softcore_electrostatics_alpha=0.3,
            softcore_sigma_Q=1.0,
            interpolate_old_and_new_14s=False,
            omitted_terms=None)

        if not topology_proposal.unique_new_atoms:
            assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
            assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
            vacuum_added_valence_energy = 0.0
        else:
            added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

        if not topology_proposal.unique_old_atoms:
            assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
            assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
            subtracted_valence_energy = 0.0
        else:
            subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

        zero_state_error, one_state_error = validate_endstate_energies(
            forward_htf._topology_proposal,
            forward_htf,
            added_valence_energy,
            subtracted_valence_energy,
            beta=1.0 / (kB * temperature),
            ENERGY_THRESHOLD=ENERGY_THRESHOLD,
            platform=openmm.Platform.getPlatformByName('Reference'))
        print(f"zero state error : {zero_state_error}")
        print(f"one state error : {one_state_error}")

        return forward_htf
def HybridTopologyFactory_energies(
        current_mol='toluene',
        proposed_mol='1,2-bis(trifluoromethyl) benzene'):
    """
    Test whether the difference in the nonalchemical zero and alchemical zero states is the forward valence energy.  Also test for the one states.
    """
    from perses.tests.utils import generate_solvated_hybrid_test_topology, generate_endpoint_thermodynamic_states
    import openmmtools.cache as cache

    #Just test the solvated system
    top_proposal, old_positions, _ = generate_solvated_hybrid_test_topology(
        current_mol_name=current_mol, proposed_mol_name=proposed_mol)

    #remove the dispersion correction
    top_proposal._old_system.getForce(3).setUseDispersionCorrection(False)
    top_proposal._new_system.getForce(3).setUseDispersionCorrection(False)

    # run geometry engine to generate old and new positions
    _geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                                use_sterics=False,
                                                n_bond_divisions=100,
                                                n_angle_divisions=180,
                                                n_torsion_divisions=360,
                                                verbose=True,
                                                storage=None,
                                                bond_softening_constant=1.0,
                                                angle_softening_constant=1.0,
                                                neglect_angles=False)
    _new_positions, _lp = _geometry_engine.propose(top_proposal, old_positions,
                                                   beta)
    _lp_rev = _geometry_engine.logp_reverse(top_proposal, _new_positions,
                                            old_positions, beta)

    # make the hybrid system, reset the CustomNonbondedForce cutoff
    HTF = HybridTopologyFactory(top_proposal, old_positions, _new_positions)
    hybrid_system = HTF.hybrid_system
    nonalch_zero, nonalch_one, alch_zero, alch_one = generate_endpoint_thermodynamic_states(
        hybrid_system, top_proposal)

    # compute reduced energies
    #for the nonalchemical systems...
    attrib_list = [(nonalch_zero, old_positions,
                    top_proposal._old_system.getDefaultPeriodicBoxVectors()),
                   (alch_zero, HTF._hybrid_positions,
                    hybrid_system.getDefaultPeriodicBoxVectors()),
                   (alch_one, HTF._hybrid_positions,
                    hybrid_system.getDefaultPeriodicBoxVectors()),
                   (nonalch_one, _new_positions,
                    top_proposal._new_system.getDefaultPeriodicBoxVectors())]

    rp_list = []
    for (state, pos, box_vectors) in attrib_list:
        context, integrator = cache.global_context_cache.get_context(state)
        samplerstate = SamplerState(positions=pos, box_vectors=box_vectors)
        samplerstate.apply_to_context(context)
        rp = state.reduced_potential(context)
        rp_list.append(rp)

    #valence energy definitions
    forward_added_valence_energy = _geometry_engine.forward_final_context_reduced_potential - _geometry_engine.forward_atoms_with_positions_reduced_potential
    reverse_subtracted_valence_energy = _geometry_engine.reverse_final_context_reduced_potential - _geometry_engine.reverse_atoms_with_positions_reduced_potential

    nonalch_zero_rp, alch_zero_rp, alch_one_rp, nonalch_one_rp = rp_list[
        0], rp_list[1], rp_list[2], rp_list[3]
    # print(f"Difference between zeros: {nonalch_zero_rp - alch_zero_rp}; forward added: {forward_added_valence_energy}")
    # print(f"Difference between ones: {nonalch_zero_rp - alch_zero_rp}; forward added: {forward_added_valence_energy}")

    assert abs(
        nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy
    ) < ENERGY_THRESHOLD, f"The zero state alchemical and nonalchemical energy absolute difference {abs(nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy)} is greater than the threshold of {ENERGY_THRESHOLD}."
    assert abs(
        nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy
    ) < ENERGY_THRESHOLD, f"The one state alchemical and nonalchemical energy absolute difference {abs(nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy)} is greater than the threshold of {ENERGY_THRESHOLD}."

    print(
        f"Abs difference in zero alchemical vs nonalchemical systems: {abs(nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy)}"
    )
    print(
        f"Abs difference in one alchemical vs nonalchemical systems: {abs(nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy)}"
    )
Beispiel #7
0
def validate_rjmc_work_variance(top_prop,
                                positions,
                                geometry_method=0,
                                num_iterations=10,
                                md_steps=250,
                                compute_timeseries=False,
                                md_system=None,
                                prespecified_conformers=None):
    """
    Arguments
    ----------
    top_prop : perses.rjmc.topology_proposal.TopologyProposal object
        topology_proposal
    md_system : openmm.System object, default None
        system from which md is conducted; the default is the top_prop._old_system
    geometry_method : int
        which geometry proposal method to use
            0: neglect_angles = True (this is supposed to be the zero-variance method)
            1: neglect_angles = False (this will accumulate variance)
            2: use_sterics = True (this is experimental)
    num_iterations: int
        number of times to run md_steps integrator
    md_steps: int
        number of md_steps to run in each num_iteration
    compute_timeseries = bool (default False)
        whether to use pymbar detectEquilibration and subsampleCorrelated data from the MD run (the potential energy is the data)
    prespecified_conformers = None or unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers)
        whether to input a unit.Quantity of conformers and bypass the conformer_generation/pymbar stage; None will default conduct this phase

    Returns
    -------
    conformers : unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers)
        decorrelated positions of the md run
    rj_works : list
        work from each conformer proposal
    """
    from openmmtools import integrators
    from perses.utils.openeye import smiles_to_oemol
    import simtk.unit as unit
    import simtk.openmm as openmm
    from openmmtools.constants import kB
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    import tqdm

    temperature = 300.0 * unit.kelvin  # unit-bearing temperature
    kT = kB * temperature  # unit-bearing thermal energy
    beta = 1.0 / kT  # unit-bearing inverse thermal energy

    #first, we must extract the top_prop relevant quantities
    topology = top_prop._old_topology
    if md_system == None:
        system = top_prop._old_system
    else:
        system = md_system

    if prespecified_conformers == None:

        #now we can specify conformations from MD
        integrator = integrators.LangevinIntegrator(
            collision_rate=1.0 / unit.picosecond,
            timestep=4.0 * unit.femtosecond,
            temperature=temperature)
        context = openmm.Context(system, integrator)
        context.setPositions(positions)
        openmm.LocalEnergyMinimizer.minimize(context)
        minimized_positions = context.getState(getPositions=True).getPositions(
            asNumpy=True)
        print(f"completed initial minimization")
        context.setPositions(minimized_positions)

        zeros = np.zeros([num_iterations, int(system.getNumParticles()), 3])
        conformers = unit.Quantity(zeros, unit=unit.nanometers)
        rps = np.zeros((num_iterations))

        print(f"conducting md sampling")
        for iteration in tqdm.trange(num_iterations):
            integrator.step(md_steps)
            state = context.getState(getPositions=True, getEnergy=True)
            new_positions = state.getPositions(asNumpy=True)
            conformers[iteration, :, :] = new_positions

            rp = state.getPotentialEnergy() * beta
            rps[iteration] = rp

        del context, integrator

        if compute_timeseries:
            print(f"computing production and data correlation")
            from pymbar import timeseries
            t0, g, Neff = timeseries.detectEquilibration(rps)
            series = timeseries.subsampleCorrelatedData(np.arange(
                t0, num_iterations),
                                                        g=g)
            print(f"production starts at index {t0} of {num_iterations}")
            print(f"the number of effective samples is {Neff}")
            indices = t0 + series
            print(f"the filtered indices are {indices}")

        else:
            indices = range(num_iterations)
    else:
        conformers = prespecified_conformers
        indices = range(len(conformers))

    #now we can define a geometry_engine
    if geometry_method == 0:
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=1000,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=True)
    elif geometry_method == 1:
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=1000,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False)
    elif geometry_method == 2:
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=True,
            n_bond_divisions=1000,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False)
    else:
        raise Exception(f"there is no geometry method for {geometry_method}")

    rj_works = []
    print(f"conducting geometry proposals...")
    for indx in tqdm.trange(len(indices)):
        index = indices[indx]
        print(f"index {indx}")
        new_positions, logp_forward = geometry_engine.propose(
            top_prop, conformers[index], beta)
        logp_backward = geometry_engine.logp_reverse(top_prop, new_positions,
                                                     conformers[index], beta)
        print(
            f"\tlogp_forward, logp_backward: {logp_forward}, {logp_backward}")
        added_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential
        subtracted_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential
        print(
            f"\tadded_energy, subtracted_energy: {added_energy}, {subtracted_energy}"
        )
        work = logp_forward - logp_backward + added_energy - subtracted_energy
        rj_works.append(work)
        print(f"\ttotal work: {work}")

    return conformers, rj_works
Beispiel #8
0
def compare_energies(mol_name="naphthalene",
                     ref_mol_name="benzene",
                     atom_expression=['Hybridization'],
                     bond_expression=['Hybridization']):
    """
    Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same.
    """
    from openmmtools.constants import kB
    from openmmtools import alchemy, states
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from perses.annihilation.relative import HybridTopologyFactory
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    import simtk.openmm as openmm
    from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers
    from perses.utils.openeye import generate_expression
    from openmmforcefields.generators import SystemGenerator
    from openmoltools.forcefield_generators import generateTopologyFromOEMol
    from perses.tests.utils import validate_endstate_energies
    temperature = 300 * unit.kelvin
    # Compute kT and inverse temperature.
    kT = kB * temperature
    beta = 1.0 / kT
    ENERGY_THRESHOLD = 1e-6

    atom_expr, bond_expr = generate_expression(
        atom_expression), generate_expression(bond_expression)

    mol = iupac_to_oemol(mol_name)
    mol = generate_conformers(mol, max_confs=1)

    refmol = iupac_to_oemol(ref_mol_name)
    refmol = generate_conformers(refmol, max_confs=1)

    from openforcefield.topology import Molecule
    molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]]
    barostat = None
    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'nonbondedMethod': app.NoCutoff,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }

    system_generator = SystemGenerator(forcefields=forcefield_files,
                                       barostat=barostat,
                                       forcefield_kwargs=forcefield_kwargs,
                                       small_molecule_forcefield='gaff-2.11',
                                       molecules=molecules,
                                       cache=None)

    topology = generateTopologyFromOEMol(refmol)
    system = system_generator.create_system(topology)
    positions = extractPositionsFromOEMol(refmol)

    proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol],
                                                     system_generator)
    proposal = proposal_engine.propose(system,
                                       topology,
                                       atom_expr=atom_expr,
                                       bond_expr=bond_expr)
    geometry_engine = FFAllAngleGeometryEngine()
    new_positions, _ = geometry_engine.propose(
        proposal, positions, beta=beta, validate_energy_bookkeeping=False)
    _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta)
    #make a topology proposal with the appropriate data:

    factory = HybridTopologyFactory(proposal, positions, new_positions)
    if not proposal.unique_new_atoms:
        assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
        assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
        vacuum_added_valence_energy = 0.0
    else:
        added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

    if not proposal.unique_old_atoms:
        assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
        assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
        subtracted_valence_energy = 0.0
    else:
        subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

    zero_state_error, one_state_error = validate_endstate_energies(
        factory._topology_proposal,
        factory,
        added_valence_energy,
        subtracted_valence_energy,
        beta=1.0 / (kB * temperature),
        ENERGY_THRESHOLD=ENERGY_THRESHOLD,
        platform=openmm.Platform.getPlatformByName('Reference'))
    return factory
    def __init__(
            self,
            receptor_filename,
            ligand_filename,
            mutation_chain_id,
            mutation_residue_id,
            proposed_residue,
            phase='complex',
            conduct_endstate_validation=False,
            ligand_index=0,
            forcefield_files=[
                'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'
            ],
            barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                               temperature, 50),
            forcefield_kwargs={
                'removeCMMotion': False,
                'ewaldErrorTolerance': 1e-4,
                'nonbondedMethod': app.PME,
                'constraints': app.HBonds,
                'hydrogenMass': 4 * unit.amus
            },
            small_molecule_forcefields='gaff-2.11',
            **kwargs):
        """
        arguments
            receptor_filename : str
                path to receptor; .pdb
            ligand_filename : str
                path to ligand of interest; .sdf or .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the hybrid topology factory
            ligand_index : int, default 0
                which ligand to use
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization

        TODO : allow argument for separate apo structure if it exists separately
               allow argument for specator ligands besides the 'ligand_filename'
        """
        from openforcefield.topology import Molecule
        from openmmforcefields.generators import SystemGenerator

        # first thing to do is make a complex and apo...
        pdbfile = open(receptor_filename, 'r')
        pdb = app.PDBFile(pdbfile)
        pdbfile.close()
        receptor_positions, receptor_topology, receptor_md_topology = pdb.positions, pdb.topology, md.Topology.from_openmm(
            pdb.topology)
        receptor_topology = receptor_md_topology.to_openmm()
        receptor_n_atoms = receptor_md_topology.n_atoms

        molecules = []
        ligand_mol = createOEMolFromSDF(ligand_filename, index=ligand_index)
        ligand_mol = generate_unique_atom_names(ligand_mol)
        molecules.append(
            Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
        ligand_positions, ligand_topology = extractPositionsFromOEMol(
            ligand_mol), forcefield_generators.generateTopologyFromOEMol(
                ligand_mol)
        ligand_md_topology = md.Topology.from_openmm(ligand_topology)
        ligand_n_atoms = ligand_md_topology.n_atoms

        #now create a complex
        complex_md_topology = receptor_md_topology.join(ligand_md_topology)
        complex_topology = complex_md_topology.to_openmm()
        complex_positions = unit.Quantity(np.zeros(
            [receptor_n_atoms + ligand_n_atoms, 3]),
                                          unit=unit.nanometers)
        complex_positions[:receptor_n_atoms, :] = receptor_positions
        complex_positions[receptor_n_atoms:, :] = ligand_positions

        #now for a system_generator
        self.system_generator = SystemGenerator(
            forcefields=forcefield_files,
            barostat=barostat,
            forcefield_kwargs=forcefield_kwargs,
            small_molecule_forcefield=small_molecule_forcefields,
            molecules=molecules,
            cache=None)

        #create complex and apo inputs...
        complex_topology, complex_positions, complex_system = self._solvate(
            complex_topology, complex_positions, 'tip3p', phase=phase)
        apo_topology, apo_positions, apo_system = self._solvate(
            receptor_topology, receptor_positions, 'tip3p', phase='phase')

        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=100,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False,
            use_14_nonbondeds=True)

        #run pipeline...
        htfs = []
        for (top, pos, sys) in zip([complex_topology, apo_topology],
                                   [complex_positions, apo_positions],
                                   [complex_system, apo_system]):
            point_mutation_engine = PointMutationEngine(
                wildtype_topology=top,
                system_generator=self.system_generator,
                chain_id=
                mutation_chain_id,  #denote the chain id allowed to mutate (it's always a string variable)
                max_point_mutants=1,
                residues_allowed_to_mutate=[
                    mutation_residue_id
                ],  #the residue ids allowed to mutate
                allowed_mutations=[
                    (mutation_residue_id, proposed_residue)
                ],  #the residue ids allowed to mutate with the three-letter code allowed to change
                aggregate=True)  #always allow aggregation

            topology_proposal = point_mutation_engine.propose(sys, top)

            new_positions, logp_proposal = geometry_engine.propose(
                topology_proposal, pos, beta)
            logp_reverse = geometry_engine.logp_reverse(
                topology_proposal, new_positions, pos, beta)

            forward_htf = HybridTopologyFactory(
                topology_proposal=topology_proposal,
                current_positions=pos,
                new_positions=new_positions,
                use_dispersion_correction=False,
                functions=None,
                softcore_alpha=None,
                bond_softening_constant=1.0,
                angle_softening_constant=1.0,
                soften_only_new=False,
                neglected_new_angle_terms=[],
                neglected_old_angle_terms=[],
                softcore_LJ_v2=True,
                softcore_electrostatics=True,
                softcore_LJ_v2_alpha=0.85,
                softcore_electrostatics_alpha=0.3,
                softcore_sigma_Q=1.0,
                interpolate_old_and_new_14s=False,
                omitted_terms=None)

            if not topology_proposal.unique_new_atoms:
                assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
                assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
                vacuum_added_valence_energy = 0.0
            else:
                added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

            if not topology_proposal.unique_old_atoms:
                assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
                assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
                subtracted_valence_energy = 0.0
            else:
                subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

            if conduct_endstate_validation:
                zero_state_error, one_state_error = validate_endstate_energies(
                    forward_htf._topology_proposal,
                    forward_htf,
                    added_valence_energy,
                    subtracted_valence_energy,
                    beta=beta,
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD)
            else:
                pass

            htfs.append(forward_htf)

        self.complex_htf = htfs[0]
        self.apo_htf = htfs[1]
Beispiel #10
0
class ExpandedEnsembleSampler(object):
    """
    Method of expanded ensembles sampling engine.

    Properties
    ----------
    sampler : MCMCSampler
        The MCMC sampler used for updating positions.
    proposal_engine : ProposalEngine
        The ProposalEngine to use for proposing new sampler states and topologies.
    system_generator : SystemGenerator
        The SystemGenerator to use for creating System objects following proposals.
    state : hashable object
        The current sampler state. Can be any hashable object.
    states : set of hashable object
        All known states.
    iteration : int
        Iterations completed.
    naccepted : int
        Number of accepted thermodynamic/chemical state changes.
    nrejected : int
        Number of rejected thermodynamic/chemical state changes.
    number_of_state_visits : dict of state_key
        Cumulative counts of visited states.
    verbose : bool
        If True, verbose output is printed.

    References
    ----------
    [1] Lyubartsev AP, Martsinovski AA, Shevkunov SV, and Vorontsov-Velyaminov PN. New approach to Monte Carlo calculation of the free energy: Method of expanded ensembles. JCP 96:1776, 1992
    http://dx.doi.org/10.1063/1.462133

    Examples
    --------
    >>> # Create a test system
    >>> test = testsystems.AlanineDipeptideVacuum()
    >>> # Create a SystemGenerator and rebuild the System.
    >>> from perses.rjmc.topology_proposal import SystemGenerator
    >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : None })
    >>> test.system = system_generator.build_system(test.topology)
    >>> # Create a sampler state.
    >>> sampler_state = SamplerState(system=test.system, positions=test.positions)
    >>> # Create a thermodynamic state.
    >>> thermodynamic_state = ThermodynamicState(system=test.system, temperature=298.0*unit.kelvin)
    >>> # Create an MCMC sampler
    >>> mcmc_sampler = MCMCSampler(thermodynamic_state, sampler_state)
    >>> # Turn off verbosity
    >>> mcmc_sampler.verbose = False
    >>> # Create an Expanded Ensemble sampler
    >>> from perses.rjmc.topology_proposal import PointMutationEngine
    >>> allowed_mutations = [[('2','ALA')],[('2','VAL'),('2','LEU')]]
    >>> proposal_engine = PointMutationEngine(system_generator, max_point_mutants=1, chain_id='1', proposal_metadata=None, allowed_mutations=allowed_mutations)
    >>> exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, test.topology, 'ACE-ALA-NME', proposal_engine)
    >>> # Run the sampler
    >>> exen_sampler.run()

    """
    def __init__(self, sampler, topology, state_key, proposal_engine, log_weights=None, scheme='ncmc-geometry-ncmc', options=dict(), platform=None):
        """
        Create an expanded ensemble sampler.

        p(x,k) \propto \exp[-u_k(x) + g_k]

        where g_k is the log weight.

        Parameters
        ----------
        sampler : MCMCSampler
            MCMCSampler initialized with current SamplerState
        topology : simtk.openmm.app.Topology
            Current topology
        state : hashable object
            Current chemical state
        proposal_engine : ProposalEngine
            ProposalEngine to use for proposing new chemical states
        log_weights : dict of object : float
            Log weights to use for expanded ensemble biases.
        scheme : str, optional, default='ncmc-geometry-ncmc'
            Update scheme. One of ['ncmc-geometry-ncmc', 'geometry-ncmc-geometry', 'geometry-ncmc']
        options : dict, optional, default=dict()
            Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC
        platform : simtk.openmm.Platform, optional, default=None
            Platform to use for NCMC switching.  If `None`, default (fastest) platform is used.

        """
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self.topology = topology
        self.state_key = state_key
        self.proposal_engine = proposal_engine
        self.log_weights = log_weights
        self.scheme = scheme
        if self.log_weights is None: self.log_weights = dict()

        # Initialize
        self.iteration = 0
        option_names = ['timestep', 'nsteps', 'functions']
        for option_name in option_names:
            if option_name not in options:
                options[option_name] = None
        from perses.annihilation.ncmc_switching import NCMCEngine
        self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature, timestep=options['timestep'], nsteps=options['nsteps'], functions=options['functions'], platform=platform)
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        self.geometry_engine = FFAllAngleGeometryEngine({'data': 0})
        self.naccepted = 0
        self.nrejected = 0
        self.number_of_state_visits = dict()
        self.verbose = False
        self.pdbfile = None # if not None, write PDB file
        self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals
        self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs


    @property
    def state_keys(self):
        return log_weights.keys()

    def get_log_weight(self, state_key):
        """
        Get the log weight of the specified state.

        Parameters
        ----------
        state_key : hashable object
            The state key (e.g. chemical state key) to look up.

        Returns
        -------
        log_weight : float
            The log weight of the provided state key.

        Note
        ----
        This adds the key to the self.log_weights dict.

        """
        if state_key not in self.log_weights:
            self.log_weights[state_key] = 0.0
        return self.log_weights[state_key]

    def update_positions(self):
        """
        Sample new positions.
        """
        self.sampler.update()

    def update_state(self):
        """
        Sample the thermodynamic state.
        """
        # Check that system and topology have same number of atoms.
        old_system = self.sampler.sampler_state.system
        old_topology = self.topology
        old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms
        old_system_natoms = old_system.getNumParticles()
        if old_topology_natoms != old_system_natoms:
            msg = 'ExpandedEnsembleSampler: topology has %d atoms, while system has %d atoms' % (old_topology_natoms, old_system_natoms)
            raise Exception(msg)

        if self.scheme == 'ncmc-geometry-ncmc':
            if self.verbose: print("Updating chemical state with ncmc-geometry-ncmc scheme...")

            # DEBUG: Check current topology can be built.
            try:
                self.proposal_engine._system_generator.build_system(self.topology)
            except Exception as e:
                msg = str(e)
                msg += '\n'
                msg += 'ExpandedEnsembleSampler.update_sampler: self.topology before ProposalEngine call cannot be built into a system'
                raise Exception(msg)

            # Propose new chemical state.
            if self.verbose: print("Proposing new topology...")
            [system, topology, positions] = [self.sampler.thermodynamic_state.system, self.topology, self.sampler.sampler_state.positions]
            topology_proposal = self.proposal_engine.propose(system, topology)
            if self.verbose: print("Proposed transformation: %s => %s" % (topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key))

            # DEBUG: Check current topology can be built.
            if self.verbose: print("Generating new system...")
            try:
                self.proposal_engine._system_generator.build_system(topology_proposal.new_topology)
            except Exception as e:
                msg = str(e)
                msg += '\n'
                msg += 'ExpandedEnsembleSampler.update_sampler: toology_proposal.new_topology before ProposalEngine call cannot be built into a system'
                raise Exception(msg)

            # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map
            natoms_old = topology_proposal.old_system.getNumParticles()
            natoms_new = topology_proposal.new_system.getNumParticles()
            if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)):
                msg = "Some old atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_old
                msg += str(topology_proposal.new_to_old_atom_map)
                raise Exception(msg)
            if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)):
                msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new
                msg += str(topology_proposal.new_to_old_atom_map)
                raise Exception(msg)

            # Determine state keys
            old_state_key = self.state_key
            new_state_key = topology_proposal.new_chemical_state_key

            # Determine log weight
            old_log_weight = self.get_log_weight(old_state_key)
            new_log_weight = self.get_log_weight(new_state_key)

            if self.verbose: print("Performing NCMC annihilation")
            # Alchemically eliminate atoms being removed.
            [ncmc_old_positions, ncmc_elimination_logp, potential_delete] = self.ncmc_engine.integrate(topology_proposal, positions, direction='delete')
            # Check that positions are not NaN
            if np.any(np.isnan(ncmc_old_positions)):
                raise Exception("Positions are NaN after NCMC delete with %d steps" % switching_nsteps)

            if self.verbose: print("Geometry engine proposal...")
            # Generate coordinates for new atoms and compute probability ratio of old and new probabilities.
            geometry_old_positions = ncmc_old_positions
            geometry_new_positions, geometry_logp_propose = self.geometry_engine.propose(topology_proposal, geometry_old_positions, self.sampler.thermodynamic_state.beta)

            if self.geometry_pdbfile is not None:
                print("Writing proposed geometry...")
                #self.geometry_pdbfile.write('MODEL     %4d\n' % (self.iteration+1)) # PyMOL doesn't render connectivity correctly this way
                from simtk.openmm.app import PDBFile
                PDBFile.writeFile(topology_proposal.new_topology, geometry_new_positions, file=self.geometry_pdbfile)
                #self.geometry_pdbfile.write('ENDMDL\n')
                self.geometry_pdbfile.flush()

            geometry_logp_reverse = self.geometry_engine.logp_reverse(topology_proposal, geometry_new_positions, geometry_old_positions, self.sampler.thermodynamic_state.beta)
            geometry_logp = geometry_logp_reverse - geometry_logp_propose

            if self.verbose: print("Performing NCMC insertion")
            # Alchemically introduce new atoms.
            [ncmc_new_positions, ncmc_introduction_logp, potential_insert] = self.ncmc_engine.integrate(topology_proposal, geometry_new_positions, direction='insert')
            # Check that positions are not NaN
            if np.any(np.isnan(ncmc_new_positions)):
                raise Exception("Positions are NaN after NCMC insert with %d steps" % switching_nsteps)

            # Compute change in eliminated potential contribution.
            switch_logp = - (potential_insert - potential_delete)
            if self.verbose:
                print('potential before geometry  : %12.3f kT' % potential_delete)
                print('potential after geometry   : %12.3f kT' % potential_insert)
                print('---------------------------------------------------------')
                print('switch_logp                : %12.3f' % switch_logp)
                print('geometry_logp_propose      : %12.3f' % geometry_logp_propose)
                print('geometry_logp_reverse      : %12.3f' % geometry_logp_reverse)

            # Compute total log acceptance probability, including all components.
            logp_accept = topology_proposal.logp_proposal + geometry_logp + switch_logp + ncmc_elimination_logp + ncmc_introduction_logp + new_log_weight - old_log_weight
            if self.verbose:
                print("logp_accept = %+10.4e [logp_proposal %+10.4e geometry_logp %+10.4e switch_logp %+10.4e ncmc_elimination_logp %+10.4e ncmc_introduction_logp %+10.4e old_log_weight %+10.4e new_log_weight %+10.4e]"
                    % (logp_accept, topology_proposal.logp_proposal, geometry_logp, switch_logp, ncmc_elimination_logp, ncmc_introduction_logp, old_log_weight, new_log_weight))

            # Accept or reject.
            if np.isnan(logp_accept):
                accept = False
                print('logp_accept = NaN')
            else:
                accept = ((logp_accept>=0.0) or (np.random.uniform() < np.exp(logp_accept)))
                if self.accept_everything:
                    print('accept_everything option is turned on; accepting')
                    accept = True

            if accept:
                self.sampler.thermodynamic_state.system = topology_proposal.new_system
                self.sampler.sampler_state.system = topology_proposal.new_system
                self.topology = topology_proposal.new_topology
                self.sampler.sampler_state.positions = ncmc_new_positions
                self.state_key = topology_proposal.new_chemical_state_key
                self.naccepted += 1
                if self.verbose: print("    accepted")
            else:
                self.nrejected += 1
                if self.verbose: print("    rejected")

        else:
            raise Exception("Expanded ensemble state proposal scheme '%s' unsupported" % self.scheme)

        # Update statistics.
        self.update_statistics()

    def update(self):
        """
        Update the sampler with one step of sampling.
        """
        if self.verbose:
            print("-" * 80)
            print("Expanded Ensemble sampler iteration %8d" % self.iteration)
        self.update_positions()
        self.update_state()
        self.iteration += 1
        if self.verbose:
            print("-" * 80)

        if self.pdbfile is not None:
            print("Writing frame...")
            from simtk.openmm.app import PDBFile
            PDBFile.writeModel(self.topology, self.sampler.sampler_state.positions, self.pdbfile, self.iteration)
            self.pdbfile.flush()

    def run(self, niterations=1):
        """
        Run the sampler for the specified number of iterations

        Parameters
        ----------
        niterations : int, optional, default=1
            Number of iterations to run the sampler for.
        """
        for iteration in range(niterations):
            self.update()

    def update_statistics(self):
        """
        Update sampler statistics.
        """
        if self.state_key not in self.number_of_state_visits:
            self.number_of_state_visits[self.state_key] = 0
        self.number_of_state_visits[self.state_key] += 1