Beispiel #1
0
def minimize(thermodynamic_state: states.ThermodynamicState,
             sampler_state: states.SamplerState,
             max_iterations: int = 20) -> states.SamplerState:
    """
    Minimize the given system and state, up to a maximum number of steps.

    Parameters
    ----------
    thermodynamic_state : openmmtools.states.ThermodynamicState
        The state at which the system could be minimized
    sampler_state : openmmtools.states.SamplerState
        The starting state at which to minimize the system.
    max_iterations : int, optional, default 20
        The maximum number of minimization steps. Default is 20.

    Returns
    -------
    sampler_state : openmmtools.states.SamplerState
        The posititions and accompanying state following minimization
    """
    mc_move = mcmc.LangevinSplittingDynamicsMove()
    mcmc_sampler = mcmc.MCMCSampler(thermodynamic_state, sampler_state,
                                    mc_move)
    mcmc_sampler.minimize(max_iterations=max_iterations)
    return mcmc_sampler.sampler_state
Beispiel #2
0
def create_hss(pkl, suffix, selection, checkpoint_interval, n_states):
    with open(pkl, 'rb') as f:
        htf = pickle.load(f)
    lambda_protocol = LambdaProtocol(functions='default')
    reporter_file = pkl[:-3] + suffix + '.nc'
    reporter = MultiStateReporter(
        reporter_file,
        analysis_particle_indices=htf.hybrid_topology.select(selection),
        checkpoint_interval=checkpoint_interval)
    hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
        timestep=4.0 * unit.femtoseconds,
        collision_rate=5.0 / unit.picosecond,
        n_steps=250,
        reassign_velocities=False,
        n_restart_attempts=20,
        splitting="V R R R O R R R V",
        constraint_tolerance=1e-06),
                             hybrid_factory=htf,
                             online_analysis_interval=10)
    hss.setup(n_states=n_states,
              temperature=300 * unit.kelvin,
              storage_file=reporter,
              lambda_protocol=lambda_protocol,
              endstates=False)
    return hss, reporter
Beispiel #3
0
def create_hss(reporter_name,
               hybrid_factory,
               selection_string='all',
               checkpoint_interval=1,
               n_states=13):
    lambda_protocol = LambdaProtocol(functions='default')
    reporter = MultiStateReporter(
        reporter_name,
        analysis_particle_indices=hybrid_factory.hybrid_topology.select(
            selection_string),
        checkpoint_interval=checkpoint_interval)
    hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
        timestep=4.0 * unit.femtoseconds,
        collision_rate=5.0 / unit.picosecond,
        n_steps=250,
        reassign_velocities=False,
        n_restart_attempts=20,
        splitting="V R R R O R R R V",
        constraint_tolerance=1e-06),
                             hybrid_factory=hybrid_factory,
                             online_analysis_interval=10)
    hss.setup(n_states=n_states,
              temperature=300 * unit.kelvin,
              storage_file=reporter,
              lambda_protocol=lambda_protocol,
              endstates=False)
    return hss, reporter
Beispiel #4
0
def create_integrator(htf, constraint_tol):
    """
    create lambda alchemical states, thermodynamic states, sampler states, integrator, and return context, thermostate, sampler_state, integrator
    """
    fast_lambda_alchemical_state = RelativeAlchemicalState.from_system(
        htf.hybrid_system)
    fast_lambda_alchemical_state.set_alchemical_parameters(
        0.0, LambdaProtocol(functions='default'))

    fast_thermodynamic_state = CompoundThermodynamicState(
        ThermodynamicState(htf.hybrid_system, temperature=temperature),
        composable_states=[fast_lambda_alchemical_state])

    fast_sampler_state = SamplerState(
        positions=htf._hybrid_positions,
        box_vectors=htf.hybrid_system.getDefaultPeriodicBoxVectors())

    #     integrator_1 = integrators.LangevinIntegrator(temperature = temperature,
    #                                                      timestep = 0.5* unit.femtoseconds,
    #                                                      splitting = 'V R O R V',
    #                                                      measure_shadow_work = False,
    #                                                      measure_heat = False,
    #                                                      constraint_tolerance = constraint_tol,
    #                                                      collision_rate = 5.0 / unit.picoseconds)
    mcmc_moves = mcmc.LangevinSplittingDynamicsMove(
        timestep=4.0 * unit.femtoseconds,
        collision_rate=5.0 / unit.picosecond,
        n_steps=1,
        reassign_velocities=False,
        n_restart_attempts=20,
        splitting="V R O R V",
        constraint_tolerance=constraint_tol)

    #print(integrator_1.getConstraintTolerance())

    #     fast_context, fast_integrator = cache.global_context_cache.get_context(fast_thermodynamic_state, integrator_1)

    #     fast_sampler_state.apply_to_context(fast_context)

    return mcmc_moves, fast_thermodynamic_state, fast_sampler_state
def run_setup(setup_options, serialize_systems=True, build_samplers=True):
    """
    Run the setup pipeline and return the relevant setup objects based on a yaml input file.
    Parameters
    ----------
    setup_options : dict
        result of loading yaml input file
    Returns
    -------
    setup_dict: dict
        {'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss}
        - 'topology_proposals':
    """
    phases = setup_options['phases']
    known_phases = ['complex', 'solvent', 'vacuum']
    for phase in phases:
        assert (
            phase in known_phases
        ), f"Unknown phase, {phase} provided. run_setup() can be used with {known_phases}"

    if 'use_given_geometries' not in list(setup_options.keys()):
        use_given_geometries = False
    else:
        assert type(setup_options['use_given_geometries']) == type(True)
        use_given_geometries = setup_options['use_given_geometries']

    if 'complex' in phases:
        _logger.info(f"\tPulling receptor (as pdb or mol2)...")
        # We'll need the protein PDB file (without missing atoms)
        try:
            protein_pdb_filename = setup_options['protein_pdb']
            assert protein_pdb_filename is not None
            receptor_mol2 = None
        except KeyError:
            try:
                receptor_mol2 = setup_options['receptor_mol2']
                assert receptor_mol2 is not None
                protein_pdb_filename = None
            except KeyError as e:
                print(
                    "Either protein_pdb or receptor_mol2 must be specified if running a complex simulation"
                )
                raise e
    else:
        protein_pdb_filename = None
        receptor_mol2 = None

    # And a ligand file containing the pair of ligands between which we will transform
    ligand_file = setup_options['ligand_file']
    _logger.info(f"\tdetected ligand file: {ligand_file}")

    # get the indices of ligands out of the file:
    old_ligand_index = setup_options['old_ligand_index']
    new_ligand_index = setup_options['new_ligand_index']
    _logger.info(
        f"\told ligand index: {old_ligand_index}; new ligand index: {new_ligand_index}"
    )

    _logger.info(f"\tsetting up forcefield files...")
    forcefield_files = setup_options['forcefield_files']

    if "timestep" in setup_options:
        if isinstance(setup_options['timestep'], float):
            timestep = setup_options['timestep'] * unit.femtoseconds
        else:
            timestep = setup_options['timestep']
        _logger.info(f"\ttimestep: {timestep}.")
    else:
        timestep = 1.0 * unit.femtoseconds
        _logger.info(f"\tno timestep detected: setting default as 1.0fs.")

    if "neq_splitting" in setup_options:
        neq_splitting = setup_options['neq_splitting']
        _logger.info(f"\tneq_splitting: {neq_splitting}")

        try:
            eq_splitting = setup_options['eq_splitting']
            _logger.info(f"\teq_splitting: {eq_splitting}")
        except KeyError as e:
            print(
                "If you specify a nonequilibrium splitting string, you must also specify an equilibrium one."
            )
            raise e

    else:
        eq_splitting = "V R O R V"
        neq_splitting = "V R O R V"
        _logger.info(
            f"\tno splitting strings specified: defaulting to neq: {neq_splitting}, eq: {eq_splitting}."
        )

    if "measure_shadow_work" in setup_options:
        measure_shadow_work = setup_options['measure_shadow_work']
        _logger.info(f"\tmeasuring shadow work: {measure_shadow_work}.")
    else:
        measure_shadow_work = False
        _logger.info(
            f"\tno measure_shadow_work specified: defaulting to False.")
    if isinstance(setup_options['pressure'], float):
        pressure = setup_options['pressure'] * unit.atmosphere
    else:
        pressure = setup_options['pressure']
    if isinstance(setup_options['temperature'], float):
        temperature = setup_options['temperature'] * unit.kelvin
    else:
        temperature = setup_options['temperature']
    if isinstance(setup_options['solvent_padding'], float):
        solvent_padding_angstroms = setup_options[
            'solvent_padding'] * unit.angstrom
    else:
        solvent_padding_angstroms = setup_options['solvent_padding']
    if isinstance(setup_options['ionic_strength'], float):
        ionic_strength = setup_options['ionic_strength'] * unit.molar
    else:
        ionic_strength = setup_options['ionic_strength']
    _logger.info(f"\tsetting pressure: {pressure}.")
    _logger.info(f"\tsetting temperature: {temperature}.")
    _logger.info(f"\tsetting solvent padding: {solvent_padding_angstroms}A.")
    _logger.info(f"\tsetting ionic strength: {ionic_strength}M.")

    setup_pickle_file = setup_options[
        'save_setup_pickle_as'] if 'save_setup_pickle_as' in list(
            setup_options) else None
    _logger.info(f"\tsetup pickle file: {setup_pickle_file}")
    trajectory_directory = setup_options['trajectory_directory']
    _logger.info(f"\ttrajectory directory: {trajectory_directory}")
    try:
        atom_map_file = setup_options['atom_map']
        with open(atom_map_file, 'r') as f:
            atom_map = {
                int(x.split()[0]): int(x.split()[1])
                for x in f.readlines()
            }
        _logger.info(f"\tsucceeded parsing atom map.")
    except Exception:
        atom_map = None
        _logger.info(f"\tno atom map specified: default to None.")

    if 'topology_proposal' not in list(setup_options.keys(
    )) or setup_options['topology_proposal'] is None:
        _logger.info(
            f"\tno topology_proposal specified; proceeding to RelativeFEPSetup...\n\n\n"
        )
        if 'set_solvent_box_dims_to_complex' in list(setup_options.keys(
        )) and setup_options['set_solvent_box_dims_to_complex']:
            set_solvent_box_dims_to_complex = True
        else:
            set_solvent_box_dims_to_complex = False

        _logger.info(
            f'Box dimensions: {setup_options["complex_box_dimensions"]} and {setup_options["solvent_box_dimensions"]}'
        )
        fe_setup = RelativeFEPSetup(
            ligand_file,
            old_ligand_index,
            new_ligand_index,
            forcefield_files,
            phases=phases,
            protein_pdb_filename=protein_pdb_filename,
            receptor_mol2_filename=receptor_mol2,
            pressure=pressure,
            temperature=temperature,
            solvent_padding=solvent_padding_angstroms,
            spectator_filenames=setup_options['spectators'],
            map_strength=setup_options['map_strength'],
            atom_expr=setup_options['atom_expr'],
            bond_expr=setup_options['bond_expr'],
            atom_map=atom_map,
            neglect_angles=setup_options['neglect_angles'],
            anneal_14s=setup_options['anneal_1,4s'],
            small_molecule_forcefield=setup_options[
                'small_molecule_forcefield'],
            small_molecule_parameters_cache=setup_options[
                'small_molecule_parameters_cache'],
            trajectory_directory=trajectory_directory,
            trajectory_prefix=setup_options['trajectory_prefix'],
            nonbonded_method=setup_options['nonbonded_method'],
            complex_box_dimensions=setup_options['complex_box_dimensions'],
            solvent_box_dimensions=setup_options['solvent_box_dimensions'],
            ionic_strength=ionic_strength,
            remove_constraints=setup_options['remove_constraints'],
            use_given_geometries=use_given_geometries)

        _logger.info(f"\twriting pickle output...")
        if setup_pickle_file is not None:
            with open(
                    os.path.join(os.getcwd(), trajectory_directory,
                                 setup_pickle_file), 'wb') as f:
                try:
                    pickle.dump(fe_setup, f)
                    _logger.info(f"\tsuccessfully dumped pickle.")
                except Exception as e:
                    print(e)
                    print("\tUnable to save setup object as a pickle")

            _logger.info(
                f"\tsetup is complete.  Writing proposals and positions for each phase to top_prop dict..."
            )
        else:
            _logger.info(
                f"\tsetup is complete.  Omitted writing proposals and positions for each phase to top_prop dict..."
            )

        top_prop = dict()
        for phase in phases:
            top_prop[f'{phase}_topology_proposal'] = getattr(
                fe_setup, f'{phase}_topology_proposal')
            top_prop[f'{phase}_geometry_engine'] = getattr(
                fe_setup, f'_{phase}_geometry_engine')
            top_prop[f'{phase}_old_positions'] = getattr(
                fe_setup, f'{phase}_old_positions')
            top_prop[f'{phase}_new_positions'] = getattr(
                fe_setup, f'{phase}_new_positions')
            top_prop[f'{phase}_added_valence_energy'] = getattr(
                fe_setup, f'_{phase}_added_valence_energy')
            top_prop[f'{phase}_subtracted_valence_energy'] = getattr(
                fe_setup, f'_{phase}_subtracted_valence_energy')
            top_prop[f'{phase}_logp_proposal'] = getattr(
                fe_setup, f'_{phase}_logp_proposal')
            top_prop[f'{phase}_logp_reverse'] = getattr(
                fe_setup, f'_{phase}_logp_reverse')
            top_prop[f'{phase}_forward_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_forward_neglected_angles')
            top_prop[f'{phase}_reverse_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_reverse_neglected_angles')

        top_prop['ligand_oemol_old'] = fe_setup._ligand_oemol_old
        top_prop['ligand_oemol_new'] = fe_setup._ligand_oemol_new
        top_prop[
            'non_offset_new_to_old_atom_map'] = fe_setup.non_offset_new_to_old_atom_map
        _logger.info(f"\twriting atom_mapping.png")
        atom_map_outfile = os.path.join(os.getcwd(), trajectory_directory,
                                        'atom_mapping.png')

        if 'render_atom_map' in list(
                setup_options.keys()) and setup_options['render_atom_map']:
            render_atom_mapping(atom_map_outfile, fe_setup._ligand_oemol_old,
                                fe_setup._ligand_oemol_new,
                                fe_setup.non_offset_new_to_old_atom_map)

    else:
        _logger.info(f"\tloading topology proposal from yaml setup options...")
        top_prop = np.load(setup_options['topology_proposal']).item()

    n_steps_per_move_application = setup_options[
        'n_steps_per_move_application']
    _logger.info(
        f"\t steps per move application: {n_steps_per_move_application}")
    trajectory_directory = setup_options['trajectory_directory']

    trajectory_prefix = setup_options['trajectory_prefix']
    _logger.info(f"\ttrajectory prefix: {trajectory_prefix}")

    if 'atom_selection' in setup_options:
        atom_selection = setup_options['atom_selection']
        _logger.info(f"\tatom selection detected: {atom_selection}")
    else:
        _logger.info(f"\tno atom selection detected: default to all.")
        atom_selection = 'all'

    if setup_options['fe_type'] == 'neq':
        _logger.info(f"\tInstantiating nonequilibrium switching FEP")
        n_equilibrium_steps_per_iteration = setup_options[
            'n_equilibrium_steps_per_iteration']
        ncmc_save_interval = setup_options['ncmc_save_interval']
        write_ncmc_configuration = setup_options['write_ncmc_configuration']
        if setup_options['LSF']:
            _internal_parallelism = {
                'library': ('dask', 'LSF'),
                'num_processes': setup_options['processes']
            }
        else:
            _internal_parallelism = None

        ne_fep = dict()
        for phase in phases:
            _logger.info(f"\t\tphase: {phase}")
            hybrid_factory = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

            if build_samplers:
                ne_fep[phase] = SequentialMonteCarlo(
                    factory=hybrid_factory,
                    lambda_protocol=setup_options['lambda_protocol'],
                    temperature=temperature,
                    trajectory_directory=trajectory_directory,
                    trajectory_prefix=f"{trajectory_prefix}_{phase}",
                    atom_selection=atom_selection,
                    timestep=timestep,
                    eq_splitting_string=eq_splitting,
                    neq_splitting_string=neq_splitting,
                    collision_rate=setup_options['ncmc_collision_rate_ps'],
                    ncmc_save_interval=ncmc_save_interval,
                    internal_parallelism=_internal_parallelism)

        print("Nonequilibrium switching driver class constructed")

        return {'topology_proposals': top_prop, 'ne_fep': ne_fep}

    else:
        _logger.info(f"\tno nonequilibrium detected.")
        htf = dict()
        hss = dict()
        _logger.info(f"\tcataloging HybridTopologyFactories...")

        for phase in phases:
            _logger.info(f"\t\tphase: {phase}:")
            #TODO write a SAMSFEP class that mirrors NonequilibriumSwitchingFEP
            _logger.info(
                f"\t\twriting HybridTopologyFactory for phase {phase}...")
            htf[phase] = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

        for phase in phases:
            # Define necessary vars to check energy bookkeeping
            _top_prop = top_prop['%s_topology_proposal' % phase]
            _htf = htf[phase]
            _forward_added_valence_energy = top_prop['%s_added_valence_energy'
                                                     % phase]
            _reverse_subtracted_valence_energy = top_prop[
                '%s_subtracted_valence_energy' % phase]

            if not use_given_geometries:
                zero_state_error, one_state_error = validate_endstate_energies(
                    _top_prop,
                    _htf,
                    _forward_added_valence_energy,
                    _reverse_subtracted_valence_energy,
                    beta=1.0 / (kB * temperature),
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD
                )  #, trajectory_directory=f'{xml_directory}{phase}')
                _logger.info(f"\t\terror in zero state: {zero_state_error}")
                _logger.info(f"\t\terror in one state: {one_state_error}")
            else:
                _logger.info(
                    f"'use_given_geometries' was passed to setup; skipping endstate validation"
                )

            #TODO expose more of these options in input
            if build_samplers:

                n_states = setup_options['n_states']
                _logger.info(f"\tn_states: {n_states}")
                if 'n_replicas' not in setup_options:
                    n_replicas = n_states
                else:
                    n_replicas = setup_options['n_replicas']

                checkpoint_interval = setup_options['checkpoint_interval']

                # generating lambda protocol
                lambda_protocol = LambdaProtocol(
                    functions=setup_options['protocol-type'])
                _logger.info(
                    f'Using lambda protocol : {setup_options["protocol-type"]}'
                )

                if atom_selection:
                    selection_indices = htf[phase].hybrid_topology.select(
                        atom_selection)
                else:
                    selection_indices = None

                storage_name = str(trajectory_directory) + '/' + str(
                    trajectory_prefix) + '-' + str(phase) + '.nc'
                _logger.info(f'\tstorage_name: {storage_name}')
                _logger.info(f'\tselection_indices {selection_indices}')
                _logger.info(f'\tcheckpoint interval {checkpoint_interval}')
                reporter = MultiStateReporter(
                    storage_name,
                    analysis_particle_indices=selection_indices,
                    checkpoint_interval=checkpoint_interval)

                if phase == 'vacuum':
                    endstates = False
                else:
                    endstates = True

                if setup_options['fe_type'] == 'fah':
                    _logger.info('SETUP FOR FAH DONE')
                    return {
                        'topology_proposals': top_prop,
                        'hybrid_topology_factories': htf
                    }

                if setup_options['fe_type'] == 'sams':
                    hss[phase] = HybridSAMSSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'],
                        online_analysis_minimum_iterations=10,
                        flatness_criteria=setup_options['flatness-criteria'],
                        gamma0=setup_options['gamma0'])
                    hss[phase].setup(n_states=n_states,
                                     n_replicas=n_replicas,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
                elif setup_options['fe_type'] == 'repex':
                    hss[phase] = HybridRepexSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'])
                    hss[phase].setup(n_states=n_states,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
            else:
                _logger.info(f"omitting sampler construction")

            if serialize_systems:
                # save the systems and the states
                pass

                _logger.info('WRITING OUT XML FILES')
                #old_thermodynamic_state, new_thermodynamic_state, hybrid_thermodynamic_state, _ = generate_endpoint_thermodynamic_states(htf[phase].hybrid_system, _top_prop)

                xml_directory = f'{setup_options["trajectory_directory"]}/xml/'
                if not os.path.exists(xml_directory):
                    os.makedirs(xml_directory)
                from perses.utils import data
                _logger.info('WRITING OUT XML FILES')
                _logger.info(f'Saving the hybrid, old and new system to disk')
                data.serialize(
                    htf[phase].hybrid_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-hybrid-system.gz'
                )
                data.serialize(
                    htf[phase]._old_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-old-system.gz'
                )
                data.serialize(
                    htf[phase]._new_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-new-system.gz'
                )

        return {
            'topology_proposals': top_prop,
            'hybrid_topology_factories': htf,
            'hybrid_samplers': hss
        }
Beispiel #6
0
def run_equilibrium(task):
    """
    Run n_iterations*nsteps_equil integration steps.  n_iterations mcmc moves are conducted in the initial equilibration, returning n_iterations
    reduced potentials.  This is the guess as to the burn-in time for a production.  After which, a single mcmc move of nsteps_equil
    will be conducted at a time, including a time-series (pymbar) analysis to determine whether the data are decorrelated.
    The loop will conclude when a single configuration yields an iid sample.  This will be saved.

    Arguments
    ---------
    task : EquilibriumFEPTask namedtuple
        The namedtuple should have an 'input' argument.  The 'input' argument is a dict characterized with at least the following keys and values:
        {
         thermodynamic_state: (<openmmtools.states.CompoundThermodynamicState>; compound thermodynamic state comprising state at lambda = 0 (1)),
         nsteps_equil: (<int>; The number of equilibrium steps that a move should make when apply is called),
         topology: (<mdtraj.Topology>; an MDTraj topology object used to construct the trajectory),
         n_iterations: (<int>; The number of times to apply the move. Note that this is not the number of steps of dynamics),
         splitting: (<str>; The splitting string for the dynamics),
         atom_indices_to_save: (<list of int, default None>; list of indices to save when excluding waters, for instance. If None, all indices are saved.),
         trajectory_filename: (<str, optional, default None>; Full filepath of trajectory files. If none, trajectory files are not written.),
         max_size: (<float>; maximum size of the trajectory numpy array allowable until it is written to disk),
         timer: (<bool, default False>; whether to time all parts of the equilibrium run),
         _minimize: (<bool, default False>; whether to minimize the sampler_state before conducting equilibration),
         file_iterator: (<int, default 0>; which index to begin writing files),
         timestep: (<unit.Quantity=float*unit.femtoseconds>; dynamical timestep)
         }

    Returns
    -------
    out_task : EquilibriumFEPTask namedtuple
        output EquilibriumFEPTask after equilibration
    """
    inputs = task.inputs

    timer = inputs['timer']  #bool
    timers = {}
    file_numsnapshots = []
    file_iterator = inputs['file_iterator']

    # creating copies in case computation is parallelized
    if timer: start = time.time()
    thermodynamic_state = copy.deepcopy(inputs['thermodynamic_state'])
    sampler_state = task.sampler_state
    if timer: timers['copy_state'] = time.time() - start

    if inputs['_minimize']:
        _logger.debug(f"conducting minimization")
        if timer: start = time.time()
        minimize(thermodynamic_state, sampler_state)
        if timer: timers['minimize'] = time.time() - start

    #get the atom indices we need to subset the topology and positions
    if timer: start = time.time()
    if not inputs['atom_indices_to_save']:
        atom_indices = list(range(inputs['topology'].n_atoms))
        subset_topology = inputs['topology']
    else:
        atom_indices = inputs['atom_indices_to_save']
        subset_topology = inputs['topology'].subset(atom_indices)
    if timer: timers['define_topology'] = time.time() - start

    n_atoms = subset_topology.n_atoms

    #construct the MCMove:
    mc_move = mcmc.LangevinSplittingDynamicsMove(
        n_steps=inputs['nsteps_equil'],
        splitting=inputs['splitting'],
        timestep=inputs['timestep'])
    mc_move.n_restart_attempts = 10

    #create a numpy array for the trajectory
    trajectory_positions, trajectory_box_lengths, trajectory_box_angles = list(
    ), list(), list()
    reduced_potentials = list()

    #loop through iterations and apply MCMove, then collect positions into numpy array
    _logger.debug(f"conducting {inputs['n_iterations']} of production")
    if timer: eq_times = []

    init_file_iterator = inputs['file_iterator']
    for iteration in tqdm.trange(inputs['n_iterations']):
        if timer: start = time.time()
        _logger.debug(f"\tconducting iteration {iteration}")
        mc_move.apply(thermodynamic_state, sampler_state)

        #add reduced potential to reduced_potential_final_frame_list
        reduced_potentials.append(
            thermodynamic_state.reduced_potential(sampler_state))

        #trajectory_positions[iteration, :,:] = sampler_state.positions[atom_indices, :].value_in_unit_system(unit.md_unit_system)
        trajectory_positions.append(
            sampler_state.positions[atom_indices, :].value_in_unit_system(
                unit.md_unit_system))

        #get the box lengths and angles
        a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles(
            *sampler_state.box_vectors)
        trajectory_box_lengths.append([a, b, c])
        trajectory_box_angles.append([alpha, beta, gamma])

        #if tajectory positions is too large, we have to write it to disk and start fresh
        if np.array(trajectory_positions).nbytes > inputs['max_size']:
            trajectory = md.Trajectory(
                np.array(trajectory_positions),
                subset_topology,
                unitcell_lengths=np.array(trajectory_box_lengths),
                unitcell_angles=np.array(trajectory_box_angles))
            if inputs['trajectory_filename'] is not None:
                new_filename = inputs[
                    'trajectory_filename'][:-2] + f'{file_iterator:04}' + '.h5'
                file_numsnapshots.append(
                    (new_filename, len(trajectory_positions)))
                file_iterator += 1
                write_equilibrium_trajectory(trajectory, new_filename)

                #re_initialize the trajectory positions, box_lengths, box_angles
                trajectory_positions, trajectory_box_lengths, trajectory_box_angles = list(
                ), list(), list()

        if timer: eq_times.append(time.time() - start)

    if timer: timers['run_eq'] = eq_times
    _logger.debug(f"production done")

    #If there is a trajectory filename passed, write out the results here:
    if timer: start = time.time()
    if inputs['trajectory_filename'] is not None:
        #construct trajectory object:
        if trajectory_positions != list():
            #if it is an empty list, then the last iteration satistifed max_size and wrote the trajectory to disk;
            #in this case, we can just skip this
            trajectory = md.Trajectory(
                np.array(trajectory_positions),
                subset_topology,
                unitcell_lengths=np.array(trajectory_box_lengths),
                unitcell_angles=np.array(trajectory_box_angles))
            if file_iterator == init_file_iterator:  #this means that no files have been written yet
                new_filename = inputs[
                    'trajectory_filename'][:-2] + f'{file_iterator:04}' + '.h5'
                file_numsnapshots.append(
                    (new_filename, len(trajectory_positions)))
            else:
                new_filename = inputs[
                    'trajectory_filename'][:-2] + f'{file_iterator+1:04}' + '.h5'
                file_numsnapshots.append(
                    (new_filename, len(trajectory_positions)))
            write_equilibrium_trajectory(trajectory, new_filename)

    if timer: timers['write_traj'] = time.time() - start

    if not timer:
        timers = {}

    out_task = EquilibriumFEPTask(sampler_state=sampler_state,
                                  inputs=task.inputs,
                                  outputs={
                                      'reduced_potentials': reduced_potentials,
                                      'files': file_numsnapshots,
                                      'timers': timers
                                  })
    return out_task
Beispiel #7
0
def run_equilibrium(
        equilibrium_result: EquilibriumResult,
        thermodynamic_state: states.ThermodynamicState,
        nsteps_equil: int,
        topology: md.Topology,
        n_iterations: int,
        atom_indices_to_save: List[int] = None,
        trajectory_filename: str = None,
        splitting: str = "V R O R V",
        timestep: unit.Quantity = 1.0 * unit.femtoseconds
) -> EquilibriumResult:
    """
    Run nsteps of equilibrium sampling at the specified thermodynamic state and return the final sampler state
    as well as a trajectory of the positions after each application of an MCMove. This means that if the MCMove
    is configured to run 1000 steps of dynamics, and n_iterations is 100, there will be 100 frames in the resulting
    trajectory; these are the result of 100,000 steps (1000*100) of dynamics.

    Parameters
    ----------
    equilibrium_result : EquilibriumResult
       EquilibriumResult namedtuple containing the information necessary to resume
    thermodynamic_state : openmmtools.states.ThermodynamicState
        The thermodynamic state (including context parameters) that should be used
    nsteps_equil : int
        The number of equilibrium steps that a move should make when apply is called
    topology : mdtraj.Topology
        an MDTraj topology object used to construct the trajectory
    n_iterations : int
        The number of times to apply the move. Note that this is not the number of steps of dynamics; it is
        n_iterations*n_steps (which is set in the MCMove).
    splitting: str, default "V R O H R V"
        The splitting string for the dynamics
    atom_indices_to_save : list of int, default None
        list of indices to save (when excluding waters, for instance). If None, all indices are saved.
    trajectory_filename : str, optional, default None
        Full filepath of trajectory files. If none, trajectory files are not written.
    splitting: str, default "V R O H R V"
        The splitting string for the dynamics
    Returns
    -------
    equilibrium_result : EquilibriumResult
        Container namedtuple that has the SamplerState for resuming, an MDTraj trajectory, and the reduced potential of the
        final frame.
    """
    sampler_state = equilibrium_result.sampler_state
    #get the atom indices we need to subset the topology and positions
    if atom_indices_to_save is None:
        atom_indices = list(range(topology.n_atoms))
        subset_topology = topology
    else:
        subset_topology = topology.subset(atom_indices_to_save)
        atom_indices = atom_indices_to_save

    n_atoms = subset_topology.n_atoms

    #construct the MCMove:
    mc_move = mcmc.LangevinSplittingDynamicsMove(n_steps=nsteps_equil,
                                                 splitting=splitting)
    mc_move.n_restart_attempts = 10

    #create a numpy array for the trajectory
    trajectory_positions = np.zeros([n_iterations, n_atoms, 3])
    trajectory_box_lengths = np.zeros([n_iterations, 3])
    trajectory_box_angles = np.zeros([n_iterations, 3])

    #loop through iterations and apply MCMove, then collect positions into numpy array
    for iteration in range(n_iterations):
        mc_move.apply(thermodynamic_state, sampler_state)

        trajectory_positions[iteration, :] = sampler_state.positions[
            atom_indices, :].value_in_unit_system(unit.md_unit_system)

        #get the box lengths and angles
        a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles(
            *sampler_state.box_vectors)
        trajectory_box_lengths[iteration, :] = [a, b, c]
        trajectory_box_angles[iteration, :] = [alpha, beta, gamma]

    #construct trajectory object:
    trajectory = md.Trajectory(trajectory_positions,
                               subset_topology,
                               unitcell_lengths=trajectory_box_lengths,
                               unitcell_angles=trajectory_box_angles)

    #get the reduced potential from the final frame for endpoint perturbations
    reduced_potential_final_frame = thermodynamic_state.reduced_potential(
        sampler_state)

    #construct equilibrium result object
    equilibrium_result = EquilibriumResult(sampler_state,
                                           reduced_potential_final_frame)

    #If there is a trajectory filename passed, write out the results here:
    if trajectory_filename is not None:
        write_equilibrium_trajectory(equilibrium_result, trajectory,
                                     trajectory_filename)

    return equilibrium_result
Beispiel #8
0
    def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None):
        self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules]
        environments = ['explicit', 'vacuum']
        temperature = 298.15 * unit.kelvin
        pressure = 1.0 * unit.atmospheres
        constraints = app.HBonds
        self._storage = NetCDFStorage(output_filename)
        self._ncmc_switching_times = ncmc_switching_times
        self._n_equilibrium_steps = equilibrium_steps
        self._geometry_options = geometry_options

        # Create a system generator for our desired forcefields.
        from perses.rjmc.topology_proposal import SystemGenerator
        system_generators = dict()
        from pkg_resources import resource_filename
        gaff_xml_filename = resource_filename('perses', 'data/gaff.xml')
        barostat = openmm.MonteCarloBarostat(pressure, temperature)
        system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'],
                                                        forcefield_kwargs={'nonbondedCutoff': 9.0 * unit.angstrom,
                                                                           'implicitSolvent': None,
                                                                           'constraints': constraints,
                                                                           'ewaldErrorTolerance': 1e-5,
                                                                           'hydrogenMass': 3.0*unit.amu}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME}
                                                        barostat=barostat)
        system_generators['vacuum'] = SystemGenerator([gaff_xml_filename],
                                                      forcefield_kwargs={'implicitSolvent': None,
                                                                         'constraints': constraints,
                                                                         'hydrogenMass': 3.0*unit.amu}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff})

        #
        # Create topologies and positions
        #
        topologies = dict()
        positions = dict()

        from openmoltools import forcefield_generators
        forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
        forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator)

        # Create molecule in vacuum.
        from perses.utils.openeye import extractPositionsFromOEMol
        from openmoltools.openeye import smiles_to_oemol, generate_conformers
        if initial_molecule:
            smiles = initial_molecule
        else:
            smiles = np.random.choice(molecules)
        molecule = smiles_to_oemol(smiles)
        molecule = generate_conformers(molecule, max_confs=1)
        topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule)
        positions['vacuum'] = extractPositionsFromOEMol(molecule)

        # Create molecule in solvent.
        modeller = app.Modeller(topologies['vacuum'], positions['vacuum'])
        modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom)
        topologies['explicit'] = modeller.getTopology()
        positions['explicit'] = modeller.getPositions()

        # Set up the proposal engines.
        proposal_metadata = {}
        proposal_engines = dict()

        for environment in environments:
            proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules,
                                                                               system_generators[environment])

        # Generate systems
        systems = dict()
        for environment in environments:
            systems[environment] = system_generators[environment].build_system(topologies[environment])

        # Define thermodynamic state of interest.

        thermodynamic_states = dict()
        thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'],
                                                                     temperature=temperature, pressure=pressure)
        thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature)

        # Create SAMS samplers
        from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler
        mcmc_samplers = dict()
        exen_samplers = dict()
        sams_samplers = dict()
        for environment in environments:
            storage = NetCDFStorageView(self._storage, envname=environment)

            if self._geometry_options:
                n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment]
                use_sterics = self._geometry_options['use_sterics'][environment]

            else:
                n_torsion_divisions = 180
                use_sterics = False

            geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics)
            move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V",
                                                       n_restart_attempts=10)
            chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment])
            if environment == 'explicit':
                sampler_state = states.SamplerState(positions=positions[environment],
                                                    box_vectors=systems[environment].getDefaultPeriodicBoxVectors())
            else:
                sampler_state = states.SamplerState(positions=positions[environment])
            mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move)


            exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment],
                                                                 chemical_state_key, proposal_engines[environment],
                                                                 geometry_engine,
                                                                 options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment])
            exen_samplers[environment].verbose = True
            sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage)
            sams_samplers[environment].verbose = True

        # Create test MultiTargetDesign sampler.
        from perses.samplers.samplers import MultiTargetDesign
        target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0}
        designer = MultiTargetDesign(target_samplers, storage=self._storage)

        # Store things.
        self.molecules = molecules
        self.environments = environments
        self.topologies = topologies
        self.positions = positions
        self.system_generators = system_generators
        self.proposal_engines = proposal_engines
        self.thermodynamic_states = thermodynamic_states
        self.mcmc_samplers = mcmc_samplers
        self.exen_samplers = exen_samplers
        self.sams_samplers = sams_samplers
        self.designer = designer
Beispiel #9
0
    def __init__(self,
                 topology_proposal,
                 pos_old,
                 new_positions,
                 use_dispersion_correction=False,
                 forward_functions=None,
                 ncmc_nsteps=100,
                 nsteps_per_iteration=1,
                 concurrency=4,
                 platform_name="OpenCL",
                 temperature=300.0 * unit.kelvin,
                 trajectory_directory=None,
                 trajectory_prefix=None):

        #construct the hybrid topology factory object
        self._factory = HybridTopologyFactory(
            topology_proposal,
            pos_old,
            new_positions,
            use_dispersion_correction=use_dispersion_correction)

        #use default functions if none specified
        if forward_functions == None:
            self._forward_functions = self.default_forward_functions
        else:
            self._forward_functions = forward_functions

        #reverse functions to get a symmetric protocol
        self._reverse_functions = {
            param: param_formula.replace("lambda", "(1-lambda)")
            for param, param_formula in self._forward_functions.items()
        }

        #set up some class attributes
        self._hybrid_system = self._factory.hybrid_system
        self._initial_hybrid_positions = self._factory.hybrid_positions
        self._concurrency = concurrency
        self._ncmc_nsteps = ncmc_nsteps
        self._nsteps_per_iteration = nsteps_per_iteration
        self._trajectory_prefix = trajectory_prefix
        self._trajectory_directory = trajectory_directory
        self._zero_endpoint_n_atoms = topology_proposal.n_atoms_old
        self._one_endpoint_n_atoms = topology_proposal.n_atoms_new

        #initialize lists for results
        self._forward_nonequilibrium_trajectories = []
        self._reverse_nonequilibrium_trajectories = []
        self._forward_nonequilibrium_cumulative_works = []
        self._reverse_nonequilibrium_cumulative_works = []
        self._forward_nonequilibrium_results = []
        self._reverse_nonequilibrium_results = []
        self._forward_total_work = []
        self._reverse_total_work = []
        self._lambda_zero_reduced_potentials = []
        self._lambda_one_reduced_potentials = []
        self._nonalchemical_zero_endpt_reduced_potentials = []
        self._nonalchemical_one_endpt_reduced_potentials = []
        self._nonalchemical_zero_results = []
        self._nonalchemical_one_results = []

        #Set the number of times that the nonequilbrium move will have to be run in order to complete a protocol:
        if self._ncmc_nsteps % self._nsteps_per_iteration != 0:
            logging.warning(
                "The number of ncmc steps is not divisible by the number of steps per iteration. You may not have a full protocol."
            )
        self._n_iterations_per_call = self._ncmc_nsteps // self._nsteps_per_iteration

        #create the thermodynamic state
        lambda_zero_alchemical_state = alchemy.AlchemicalState.from_system(
            self._hybrid_system)
        lambda_one_alchemical_state = copy.deepcopy(
            lambda_zero_alchemical_state)

        #ensure their states are set appropriately
        lambda_zero_alchemical_state.set_alchemical_parameters(0.0)
        lambda_one_alchemical_state.set_alchemical_parameters(0.0)

        #create the base thermodynamic state with the hybrid system
        self._thermodynamic_state = ThermodynamicState(self._hybrid_system,
                                                       temperature=temperature)

        #Create thermodynamic states for the nonalchemical endpoints
        self._nonalchemical_zero_thermodynamic_state = ThermodynamicState(
            topology_proposal.old_system, temperature=temperature)
        self._nonalchemical_one_thermodynamic_state = ThermodynamicState(
            topology_proposal.new_system, temperature=temperature)

        #Now create the compound states with different alchemical states
        self._lambda_zero_thermodynamic_state = CompoundThermodynamicState(
            self._thermodynamic_state,
            composable_states=[lambda_zero_alchemical_state])
        self._lambda_one_thermodynamic_state = CompoundThermodynamicState(
            self._thermodynamic_state,
            composable_states=[lambda_one_alchemical_state])

        #create the forward and reverse integrators
        self._forward_integrator = AlchemicalNonequilibriumLangevinIntegrator(
            alchemical_functions=self._forward_functions,
            nsteps_neq=ncmc_nsteps,
            temperature=temperature)
        self._reverse_integrator = AlchemicalNonequilibriumLangevinIntegrator(
            alchemical_functions=self._reverse_functions,
            nsteps_neq=ncmc_nsteps,
            temperature=temperature)

        #create the forward and reverse MCMoves
        self._forward_ne_mc_move = NonequilibriumSwitchingMove(
            self._forward_integrator, self._nsteps_per_iteration)
        self._reverse_ne_mc_move = NonequilibriumSwitchingMove(
            self._reverse_integrator, self._nsteps_per_iteration)

        #create the equilibrium MCMove
        self._equilibrium_mc_move = mcmc.LangevinSplittingDynamicsMove()

        #set the SamplerState for the lambda 0 and 1 equilibrium simulations
        self._lambda_one_sampler_state = SamplerState(
            self._initial_hybrid_positions,
            box_vectors=self._hybrid_system.getDefaultPeriodicBoxVectors())
        self._lambda_zero_sampler_state = copy.deepcopy(
            self._lambda_one_sampler_state)

        #initialize by minimizing
        self.minimize()

        #initialize the trajectories for the lambda 0 and 1 equilibrium simulations
        a_0, b_0, c_0, alpha_0, beta_0, gamma_0 = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles(
            *self._lambda_zero_sampler_state.box_vectors)
        a_1, b_1, c_1, alpha_1, beta_1, gamma_1 = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles(
            *self._lambda_one_sampler_state.box_vectors)

        self._lambda_zero_traj = md.Trajectory(
            np.array(self._lambda_zero_sampler_state.positions),
            self._factory.hybrid_topology,
            unitcell_lengths=[a_0, b_0, c_0],
            unitcell_angles=[alpha_0, beta_0, gamma_0])
        self._lambda_one_traj = md.Trajectory(
            np.array(self._lambda_one_sampler_state.positions),
            self._factory.hybrid_topology,
            unitcell_lengths=[a_1, b_1, c_1],
            unitcell_angles=[alpha_1, beta_1, gamma_1])