예제 #1
0
def run_setup(setup_options, serialize_systems=True, build_samplers=True):
    """
    Run the setup pipeline and return the relevant setup objects based on a yaml input file.
    Parameters
    ----------
    setup_options : dict
        result of loading yaml input file
    Returns
    -------
    setup_dict: dict
        {'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss}
        - 'topology_proposals':
    """
    phases = setup_options['phases']
    known_phases = ['complex', 'solvent', 'vacuum']
    for phase in phases:
        assert (
            phase in known_phases
        ), f"Unknown phase, {phase} provided. run_setup() can be used with {known_phases}"

    if 'use_given_geometries' not in list(setup_options.keys()):
        use_given_geometries = False
    else:
        assert type(setup_options['use_given_geometries']) == type(True)
        use_given_geometries = setup_options['use_given_geometries']

    if 'complex' in phases:
        _logger.info(f"\tPulling receptor (as pdb or mol2)...")
        # We'll need the protein PDB file (without missing atoms)
        try:
            protein_pdb_filename = setup_options['protein_pdb']
            assert protein_pdb_filename is not None
            receptor_mol2 = None
        except KeyError:
            try:
                receptor_mol2 = setup_options['receptor_mol2']
                assert receptor_mol2 is not None
                protein_pdb_filename = None
            except KeyError as e:
                print(
                    "Either protein_pdb or receptor_mol2 must be specified if running a complex simulation"
                )
                raise e
    else:
        protein_pdb_filename = None
        receptor_mol2 = None

    # And a ligand file containing the pair of ligands between which we will transform
    ligand_file = setup_options['ligand_file']
    _logger.info(f"\tdetected ligand file: {ligand_file}")

    # get the indices of ligands out of the file:
    old_ligand_index = setup_options['old_ligand_index']
    new_ligand_index = setup_options['new_ligand_index']
    _logger.info(
        f"\told ligand index: {old_ligand_index}; new ligand index: {new_ligand_index}"
    )

    _logger.info(f"\tsetting up forcefield files...")
    forcefield_files = setup_options['forcefield_files']

    if "timestep" in setup_options:
        if isinstance(setup_options['timestep'], float):
            timestep = setup_options['timestep'] * unit.femtoseconds
        else:
            timestep = setup_options['timestep']
        _logger.info(f"\ttimestep: {timestep}.")
    else:
        timestep = 1.0 * unit.femtoseconds
        _logger.info(f"\tno timestep detected: setting default as 1.0fs.")

    if "neq_splitting" in setup_options:
        neq_splitting = setup_options['neq_splitting']
        _logger.info(f"\tneq_splitting: {neq_splitting}")

        try:
            eq_splitting = setup_options['eq_splitting']
            _logger.info(f"\teq_splitting: {eq_splitting}")
        except KeyError as e:
            print(
                "If you specify a nonequilibrium splitting string, you must also specify an equilibrium one."
            )
            raise e

    else:
        eq_splitting = "V R O R V"
        neq_splitting = "V R O R V"
        _logger.info(
            f"\tno splitting strings specified: defaulting to neq: {neq_splitting}, eq: {eq_splitting}."
        )

    if "measure_shadow_work" in setup_options:
        measure_shadow_work = setup_options['measure_shadow_work']
        _logger.info(f"\tmeasuring shadow work: {measure_shadow_work}.")
    else:
        measure_shadow_work = False
        _logger.info(
            f"\tno measure_shadow_work specified: defaulting to False.")
    if isinstance(setup_options['pressure'], float):
        pressure = setup_options['pressure'] * unit.atmosphere
    else:
        pressure = setup_options['pressure']
    if isinstance(setup_options['temperature'], float):
        temperature = setup_options['temperature'] * unit.kelvin
    else:
        temperature = setup_options['temperature']
    if isinstance(setup_options['solvent_padding'], float):
        solvent_padding_angstroms = setup_options[
            'solvent_padding'] * unit.angstrom
    else:
        solvent_padding_angstroms = setup_options['solvent_padding']
    if isinstance(setup_options['ionic_strength'], float):
        ionic_strength = setup_options['ionic_strength'] * unit.molar
    else:
        ionic_strength = setup_options['ionic_strength']
    _logger.info(f"\tsetting pressure: {pressure}.")
    _logger.info(f"\tsetting temperature: {temperature}.")
    _logger.info(f"\tsetting solvent padding: {solvent_padding_angstroms}A.")
    _logger.info(f"\tsetting ionic strength: {ionic_strength}M.")

    setup_pickle_file = setup_options[
        'save_setup_pickle_as'] if 'save_setup_pickle_as' in list(
            setup_options) else None
    _logger.info(f"\tsetup pickle file: {setup_pickle_file}")
    trajectory_directory = setup_options['trajectory_directory']
    _logger.info(f"\ttrajectory directory: {trajectory_directory}")
    try:
        atom_map_file = setup_options['atom_map']
        with open(atom_map_file, 'r') as f:
            atom_map = {
                int(x.split()[0]): int(x.split()[1])
                for x in f.readlines()
            }
        _logger.info(f"\tsucceeded parsing atom map.")
    except Exception:
        atom_map = None
        _logger.info(f"\tno atom map specified: default to None.")

    if 'topology_proposal' not in list(setup_options.keys(
    )) or setup_options['topology_proposal'] is None:
        _logger.info(
            f"\tno topology_proposal specified; proceeding to RelativeFEPSetup...\n\n\n"
        )
        if 'set_solvent_box_dims_to_complex' in list(setup_options.keys(
        )) and setup_options['set_solvent_box_dims_to_complex']:
            set_solvent_box_dims_to_complex = True
        else:
            set_solvent_box_dims_to_complex = False

        _logger.info(
            f'Box dimensions: {setup_options["complex_box_dimensions"]} and {setup_options["solvent_box_dimensions"]}'
        )
        fe_setup = RelativeFEPSetup(
            ligand_file,
            old_ligand_index,
            new_ligand_index,
            forcefield_files,
            phases=phases,
            protein_pdb_filename=protein_pdb_filename,
            receptor_mol2_filename=receptor_mol2,
            pressure=pressure,
            temperature=temperature,
            solvent_padding=solvent_padding_angstroms,
            spectator_filenames=setup_options['spectators'],
            map_strength=setup_options['map_strength'],
            atom_expr=setup_options['atom_expr'],
            bond_expr=setup_options['bond_expr'],
            atom_map=atom_map,
            neglect_angles=setup_options['neglect_angles'],
            anneal_14s=setup_options['anneal_1,4s'],
            small_molecule_forcefield=setup_options[
                'small_molecule_forcefield'],
            small_molecule_parameters_cache=setup_options[
                'small_molecule_parameters_cache'],
            trajectory_directory=trajectory_directory,
            trajectory_prefix=setup_options['trajectory_prefix'],
            nonbonded_method=setup_options['nonbonded_method'],
            complex_box_dimensions=setup_options['complex_box_dimensions'],
            solvent_box_dimensions=setup_options['solvent_box_dimensions'],
            ionic_strength=ionic_strength,
            remove_constraints=setup_options['remove_constraints'],
            use_given_geometries=use_given_geometries)

        _logger.info(f"\twriting pickle output...")
        if setup_pickle_file is not None:
            with open(
                    os.path.join(os.getcwd(), trajectory_directory,
                                 setup_pickle_file), 'wb') as f:
                try:
                    pickle.dump(fe_setup, f)
                    _logger.info(f"\tsuccessfully dumped pickle.")
                except Exception as e:
                    print(e)
                    print("\tUnable to save setup object as a pickle")

            _logger.info(
                f"\tsetup is complete.  Writing proposals and positions for each phase to top_prop dict..."
            )
        else:
            _logger.info(
                f"\tsetup is complete.  Omitted writing proposals and positions for each phase to top_prop dict..."
            )

        top_prop = dict()
        for phase in phases:
            top_prop[f'{phase}_topology_proposal'] = getattr(
                fe_setup, f'{phase}_topology_proposal')
            top_prop[f'{phase}_geometry_engine'] = getattr(
                fe_setup, f'_{phase}_geometry_engine')
            top_prop[f'{phase}_old_positions'] = getattr(
                fe_setup, f'{phase}_old_positions')
            top_prop[f'{phase}_new_positions'] = getattr(
                fe_setup, f'{phase}_new_positions')
            top_prop[f'{phase}_added_valence_energy'] = getattr(
                fe_setup, f'_{phase}_added_valence_energy')
            top_prop[f'{phase}_subtracted_valence_energy'] = getattr(
                fe_setup, f'_{phase}_subtracted_valence_energy')
            top_prop[f'{phase}_logp_proposal'] = getattr(
                fe_setup, f'_{phase}_logp_proposal')
            top_prop[f'{phase}_logp_reverse'] = getattr(
                fe_setup, f'_{phase}_logp_reverse')
            top_prop[f'{phase}_forward_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_forward_neglected_angles')
            top_prop[f'{phase}_reverse_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_reverse_neglected_angles')

        top_prop['ligand_oemol_old'] = fe_setup._ligand_oemol_old
        top_prop['ligand_oemol_new'] = fe_setup._ligand_oemol_new
        top_prop[
            'non_offset_new_to_old_atom_map'] = fe_setup.non_offset_new_to_old_atom_map
        _logger.info(f"\twriting atom_mapping.png")
        atom_map_outfile = os.path.join(os.getcwd(), trajectory_directory,
                                        'atom_mapping.png')

        if 'render_atom_map' in list(
                setup_options.keys()) and setup_options['render_atom_map']:
            render_atom_mapping(atom_map_outfile, fe_setup._ligand_oemol_old,
                                fe_setup._ligand_oemol_new,
                                fe_setup.non_offset_new_to_old_atom_map)

    else:
        _logger.info(f"\tloading topology proposal from yaml setup options...")
        top_prop = np.load(setup_options['topology_proposal']).item()

    n_steps_per_move_application = setup_options[
        'n_steps_per_move_application']
    _logger.info(
        f"\t steps per move application: {n_steps_per_move_application}")
    trajectory_directory = setup_options['trajectory_directory']

    trajectory_prefix = setup_options['trajectory_prefix']
    _logger.info(f"\ttrajectory prefix: {trajectory_prefix}")

    if 'atom_selection' in setup_options:
        atom_selection = setup_options['atom_selection']
        _logger.info(f"\tatom selection detected: {atom_selection}")
    else:
        _logger.info(f"\tno atom selection detected: default to all.")
        atom_selection = 'all'

    if setup_options['fe_type'] == 'neq':
        _logger.info(f"\tInstantiating nonequilibrium switching FEP")
        n_equilibrium_steps_per_iteration = setup_options[
            'n_equilibrium_steps_per_iteration']
        ncmc_save_interval = setup_options['ncmc_save_interval']
        write_ncmc_configuration = setup_options['write_ncmc_configuration']
        if setup_options['LSF']:
            _internal_parallelism = {
                'library': ('dask', 'LSF'),
                'num_processes': setup_options['processes']
            }
        else:
            _internal_parallelism = None

        ne_fep = dict()
        for phase in phases:
            _logger.info(f"\t\tphase: {phase}")
            hybrid_factory = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

            if build_samplers:
                ne_fep[phase] = SequentialMonteCarlo(
                    factory=hybrid_factory,
                    lambda_protocol=setup_options['lambda_protocol'],
                    temperature=temperature,
                    trajectory_directory=trajectory_directory,
                    trajectory_prefix=f"{trajectory_prefix}_{phase}",
                    atom_selection=atom_selection,
                    timestep=timestep,
                    eq_splitting_string=eq_splitting,
                    neq_splitting_string=neq_splitting,
                    collision_rate=setup_options['ncmc_collision_rate_ps'],
                    ncmc_save_interval=ncmc_save_interval,
                    internal_parallelism=_internal_parallelism)

        print("Nonequilibrium switching driver class constructed")

        return {'topology_proposals': top_prop, 'ne_fep': ne_fep}

    else:
        _logger.info(f"\tno nonequilibrium detected.")
        htf = dict()
        hss = dict()
        _logger.info(f"\tcataloging HybridTopologyFactories...")

        for phase in phases:
            _logger.info(f"\t\tphase: {phase}:")
            #TODO write a SAMSFEP class that mirrors NonequilibriumSwitchingFEP
            _logger.info(
                f"\t\twriting HybridTopologyFactory for phase {phase}...")
            htf[phase] = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

        for phase in phases:
            # Define necessary vars to check energy bookkeeping
            _top_prop = top_prop['%s_topology_proposal' % phase]
            _htf = htf[phase]
            _forward_added_valence_energy = top_prop['%s_added_valence_energy'
                                                     % phase]
            _reverse_subtracted_valence_energy = top_prop[
                '%s_subtracted_valence_energy' % phase]

            if not use_given_geometries:
                zero_state_error, one_state_error = validate_endstate_energies(
                    _top_prop,
                    _htf,
                    _forward_added_valence_energy,
                    _reverse_subtracted_valence_energy,
                    beta=1.0 / (kB * temperature),
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD
                )  #, trajectory_directory=f'{xml_directory}{phase}')
                _logger.info(f"\t\terror in zero state: {zero_state_error}")
                _logger.info(f"\t\terror in one state: {one_state_error}")
            else:
                _logger.info(
                    f"'use_given_geometries' was passed to setup; skipping endstate validation"
                )

            #TODO expose more of these options in input
            if build_samplers:

                n_states = setup_options['n_states']
                _logger.info(f"\tn_states: {n_states}")
                if 'n_replicas' not in setup_options:
                    n_replicas = n_states
                else:
                    n_replicas = setup_options['n_replicas']

                checkpoint_interval = setup_options['checkpoint_interval']

                # generating lambda protocol
                lambda_protocol = LambdaProtocol(
                    functions=setup_options['protocol-type'])
                _logger.info(
                    f'Using lambda protocol : {setup_options["protocol-type"]}'
                )

                if atom_selection:
                    selection_indices = htf[phase].hybrid_topology.select(
                        atom_selection)
                else:
                    selection_indices = None

                storage_name = str(trajectory_directory) + '/' + str(
                    trajectory_prefix) + '-' + str(phase) + '.nc'
                _logger.info(f'\tstorage_name: {storage_name}')
                _logger.info(f'\tselection_indices {selection_indices}')
                _logger.info(f'\tcheckpoint interval {checkpoint_interval}')
                reporter = MultiStateReporter(
                    storage_name,
                    analysis_particle_indices=selection_indices,
                    checkpoint_interval=checkpoint_interval)

                if phase == 'vacuum':
                    endstates = False
                else:
                    endstates = True

                if setup_options['fe_type'] == 'fah':
                    _logger.info('SETUP FOR FAH DONE')
                    return {
                        'topology_proposals': top_prop,
                        'hybrid_topology_factories': htf
                    }

                if setup_options['fe_type'] == 'sams':
                    hss[phase] = HybridSAMSSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'],
                        online_analysis_minimum_iterations=10,
                        flatness_criteria=setup_options['flatness-criteria'],
                        gamma0=setup_options['gamma0'])
                    hss[phase].setup(n_states=n_states,
                                     n_replicas=n_replicas,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
                elif setup_options['fe_type'] == 'repex':
                    hss[phase] = HybridRepexSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'])
                    hss[phase].setup(n_states=n_states,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
            else:
                _logger.info(f"omitting sampler construction")

            if serialize_systems:
                # save the systems and the states
                pass

                _logger.info('WRITING OUT XML FILES')
                #old_thermodynamic_state, new_thermodynamic_state, hybrid_thermodynamic_state, _ = generate_endpoint_thermodynamic_states(htf[phase].hybrid_system, _top_prop)

                xml_directory = f'{setup_options["trajectory_directory"]}/xml/'
                if not os.path.exists(xml_directory):
                    os.makedirs(xml_directory)
                from perses.utils import data
                _logger.info('WRITING OUT XML FILES')
                _logger.info(f'Saving the hybrid, old and new system to disk')
                data.serialize(
                    htf[phase].hybrid_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-hybrid-system.gz'
                )
                data.serialize(
                    htf[phase]._old_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-old-system.gz'
                )
                data.serialize(
                    htf[phase]._new_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-new-system.gz'
                )

        return {
            'topology_proposals': top_prop,
            'hybrid_topology_factories': htf,
            'hybrid_samplers': hss
        }
예제 #2
0
def run_neq_fah_setup(ligand_file,
                      old_ligand_index,
                      new_ligand_index,
                      forcefield_files,
                      trajectory_directory,
                      complex_box_dimensions=(9.8, 9.8, 9.8),
                      solvent_box_dimensions=(3.5, 3.5, 3.5),
                      timestep=4.0 * unit.femtosecond,
                      eq_splitting='V R O R V',
                      neq_splitting='V R H O R V',
                      measure_shadow_work=False,
                      pressure=1.0,
                      temperature=300,
                      solvent_padding=9 * unit.angstroms,
                      phases=['complex', 'solvent', 'vacuum'],
                      protein_pdb=None,
                      receptor_mol2=None,
                      small_molecule_forcefield='openff-1.0.0',
                      small_molecule_parameters_cache=None,
                      atom_expression=['IntType'],
                      bond_expression=['DefaultBonds'],
                      spectators=None,
                      neglect_angles=False,
                      anneal_14s=False,
                      nonbonded_method='PME',
                      map_strength=None,
                      softcore_v2=False,
                      save_setup_pickle_as=None,
                      render_atom_map=False,
                      alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS,
                      num_equilibration_iterations=1000,
                      num_equilibration_steps_per_iteration=250,
                      nsteps_eq=250000,
                      nsteps_neq=250000,
                      fe_type='fah',
                      collision_rate=1. / unit.picoseconds,
                      collision_rate_setup=90. / unit.picoseconds,
                      constraint_tolerance=1e-6,
                      n_steps_per_move_application=250,
                      globalVarFreq=250,
                      **kwargs):
    """
    main execution function that will:
        - create a directory for each phase according to the `trajectory_directory` argument
        - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file`
        - create topology proposals for all phases
        - create/serialize hybrid factories or all phases (and validate endstates)
        - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases
        - relax generated structures with a minimizer and LangevinIntegrator for all phases
        - create/serialize a state associated with the relaxed structures
        - create/serialize a `core.xml` object for all phases

    arguments
        ligand_file : str
            .sdf (or any openeye-readable) file containing ligand labeled indices and structures
        old_ligand_index : int
            index of the old ligand
        new_ligand_index : int
            inded of the new ligand
        forcefield_files : list of str
            list of forcefields to use for complex/solvent parameterization
        trajectory_directory : str
            RUNXXX for FAH deployment
        complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8)
            define box dimensions of complex phase
        solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5)
            define box dimensions of solvent phase
        timestep : simtk.unit.Quantity, default=4.*unit.femtosecond
            step size of nonequilibrium integration
        eq_splitting : str, default = 'V R O R V'
            splitting string of relaxation dynamics
        neq_splitting : str, default = 'V R H O R V'
            splitting string of nonequilibrium dynamics
        measure_shadow_work : bool, default=False
            True/False to measure shadow work
        pressure: float, default=1.
            pressure in atms for simulation
        temperature: float, default=300.,
            temperature in K for simulation
        phases: list, default = ['complex','solvent','vacuum']
            phases to run, where allowed phases are 'complex','solvent','vacuum'
        protein_pdb : str, default=None
            name of protein file
        receptor_mol2 : str, default=None
            name of receptor file if protein_pdb not provided
        small_molecule_forcefield : str, default='openff-1.0.0'
            small molecule forcefield filename
        small_molecule_parameters_cache : str, default=None
            cache file containing small molecule forcefield files
        atom_expression : list default=['IntType']
            list of string for atom mapping criteria. see oechem.OEExprOpts for options
        bond_expression : list default=['DefaultBonds']
            list of string for bond mapping criteria. see oechem.OEExprOpts for options
        map_strength : 'str', default=None
            atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used.
        spectators : str, default=None
            path to any non-alchemical atoms in simulation
        neglect_angles : bool, default=False
            wether to use angle terms in building of unique-new groups. False is strongly recommended
        anneal_14s : bool, default False
            Whether to anneal 1,4 interactions over the protocol;
        nonbonded_method : str, default='PME'
            nonbonded method to use
        softcore_v2=bool, default=False
            wether to use v2 softcore
        alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS
            alchemical functions for transformation
        num_equilibration_iterations: int, default=1000
            number of equilibration steps to do during set up
        num_equilibration_steps_per_iteration: int, default=250,
            number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP
        nsteps_eq : int, default=250000
            number of normal MD steps to take for FAH integrator for PRODUCTION
        nsteps_neq : int, default=250000
            number of nonequilibrium steps to take for FAH integrator for PRODUCTION
        fe_type : str, default='fah'
            tells setup_relative_calculation() to use the fah pipeline
        collision_rate : simtk.unit.Quantity, default=1./unit.picosecond
            collision_rate for PRODUCTION
        collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond
        constraint_tolerance : float, default=1e-6
            tolerance to use for constraints
        n_steps_per_move_application : int default=250
            number of equilibrium steps to take per move
    """
    from perses.app.setup_relative_calculation import run_setup
    from perses.utils import data
    #turn all of the args into a dict for passing to run_setup
    setup_options = locals()
    if 'kwargs' in setup_options.keys():
        setup_options.update(setup_options['kwargs'])

    #some modification for fah-specific functionality:
    setup_options['trajectory_prefix'] = None
    setup_options['anneal_1,4s'] = False
    from perses.utils.openeye import generate_expression
    setup_options['atom_expr'] = generate_expression(
        setup_options['atom_expression'])
    setup_options['bond_expr'] = generate_expression(
        setup_options['bond_expression'])

    #run the run_setup to generate topology proposals and htfs
    _logger.info(f"spectators: {setup_options['spectators']}")
    setup_dict = run_setup(setup_options,
                           serialize_systems=False,
                           build_samplers=False)
    topology_proposals = setup_dict['topology_proposals']
    htfs = setup_dict['hybrid_topology_factories']

    #create solvent and complex directories
    for phase in htfs.keys():
        _logger.info(f'PHASE RUNNING: {phase}')
        _logger.info(f'Setting up phase {phase}')
        if phase == 'solvent':
            phase_dir = f"{setup_options['solvent_projid']}/RUNS"
        if phase == 'complex':
            phase_dir = f"{setup_options['complex_projid']}/RUNS"
        if phase == 'vacuum':
            phase_dir = 'VACUUM/RUNS'
        dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory)
        if not os.path.exists(dir):
            os.mkdir(dir)

        np.savez_compressed(f'{dir}/htf', htfs[phase])

        #serialize the hybrid_system
        data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2")

        #make and serialize an integrator
        integrator = make_neq_integrator(**setup_options)
        data.serialize(integrator, f"{dir}/integrator.xml")

        #create and serialize a state
        try:
            state = relax_structure(
                temperature=temperature,
                system=htfs[phase].hybrid_system,
                positions=htfs[phase].hybrid_positions,
                nequil=num_equilibration_iterations,
                n_steps_per_iteration=num_equilibration_steps_per_iteration,
                collision_rate=collision_rate_setup)

            data.serialize(state, f"{dir}/state.xml.bz2")
        except Exception as e:
            print(e)
            passed = False
        else:
            passed = True

        pos = state.getPositions(asNumpy=True)
        pos = np.asarray(pos)

        import mdtraj as md
        top = htfs[phase].hybrid_topology
        np.save(f'{dir}/hybrid_topology', top)
        traj = md.Trajectory(pos, top)
        traj.remove_solvent(exclude=['CL', 'NA'], inplace=True)
        traj.save(f'{dir}/hybrid_{phase}.pdb')

        #lastly, make a core.xml
        nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq
        ncycles = 1
        nsteps_per_ps = 250
        core_parameters = {
            'numSteps': ncycles * nsteps_per_cycle,
            'xtcFreq': 1000 * nsteps_per_ps,  # once per ns
            'xtcAtoms': 'solute',
            'precision': 'mixed',
            'globalVarFilename': 'globals.csv',
            'globalVarFreq': 10 * nsteps_per_ps,
        }
        # Serialize core.xml
        import dicttoxml
        with open(f'{dir}/core.xml', 'wt') as outfile:
            #core_parameters = create_core_parameters(phase)
            xml = dicttoxml.dicttoxml(core_parameters,
                                      custom_root='config',
                                      attr_type=False)
            from xml.dom.minidom import parseString
            dom = parseString(xml)
            outfile.write(dom.toprettyxml())

        #create a logger for reference
        references = {
            'start_ligand': old_ligand_index,
            'end_ligand': new_ligand_index,
            'protein_pdb': protein_pdb,
            'passed_strucutre_relax': passed
        }

        np.save(f'{dir}/references', references)

        tp = topology_proposals
        from perses.utils.smallmolecules import render_atom_mapping
        render_atom_mapping(f'{dir}/atom_map.png', tp['ligand_oemol_old'],
                            tp['ligand_oemol_new'],
                            tp['non_offset_new_to_old_atom_map'])
예제 #3
0
def run_neq_fah_setup(ligand_file,
                      old_ligand_index,
                      new_ligand_index,
                      forcefield_files,
                      trajectory_directory,
                      complex_box_dimensions=(9.8, 9.8, 9.8),
                      solvent_box_dimensions=(3.5, 3.5, 3.5),
                      timestep=4.0,
                      eq_splitting='V R O R V',
                      neq_splitting='V R H O R V',
                      measure_shadow_work=False,
                      pressure=1.0,
                      temperature=300. * unit.kelvin,
                      solvent_padding=9 * unit.angstroms,
                      phases=['complex', 'solvent', 'vacuum'],
                      phase_project_ids=None,
                      protein_pdb=None,
                      receptor_mol2=None,
                      small_molecule_forcefield='openff-1.2.0',
                      small_molecule_parameters_cache=None,
                      atom_expression=['IntType'],
                      bond_expression=['DefaultBonds'],
                      spectators=None,
                      neglect_angles=False,
                      anneal_14s=False,
                      nonbonded_method='PME',
                      map_strength=None,
                      softcore_v2=False,
                      save_setup_pickle_as=None,
                      render_atom_map=False,
                      alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS,
                      num_equilibration_iterations=1000,
                      num_equilibration_steps_per_iteration=250,
                      nsteps_eq=250000,
                      nsteps_neq=250000,
                      fe_type='fah',
                      collision_rate=1. / unit.picoseconds,
                      collision_rate_setup=90. / unit.picoseconds,
                      constraint_tolerance=1e-6,
                      n_steps_per_move_application=250,
                      globalVarFreq=250,
                      setup='small_molecule',
                      protein_kwargs=None,
                      ionic_strength=0.15 * unit.molar,
                      remove_constraints='not water',
                      **kwargs):
    """
    main execution function that will:
        - create a directory for each phase according to the `trajectory_directory` argument
        - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file`
        - create topology proposals for all phases
        - create/serialize hybrid factories or all phases (and validate endstates)
        - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases
        - relax generated structures with a minimizer and LangevinIntegrator for all phases
        - create/serialize a state associated with the relaxed structures
        - create/serialize a `core.xml` object for all phases


    >>> run_neq_fah_setup('ligand.sdf', 0, 1,['amber/ff14SB.xml','amber/tip3p_standard.xml','amber/tip3p_HFE_multivalent.xml'],'RUN0',protein_pdb='protein.pdb', phases=['complex','solvent','vacuum'],phase_project_ids={'complex':14320,'solvent':14321,'vacuum':'vacuum'})

    arguments
        ligand_file : str
            .sdf (or any openeye-readable) file containing ligand labeled indices and structures
        old_ligand_index : int
            index of the old ligand
        new_ligand_index : int
            inded of the new ligand
        forcefield_files : list of str
            list of forcefields to use for complex/solvent parameterization
        trajectory_directory : str
            RUNXXX for FAH deployment
        complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8)
            define box dimensions of complex phase (in nm)
        solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5)
            define box dimensions of solvent phase (in nm)
        timestep : float, default=4.
            step size of nonequilibrium integration
        eq_splitting : str, default = 'V R O R V'
            splitting string of relaxation dynamics
        neq_splitting : str, default = 'V R H O R V'
            splitting string of nonequilibrium dynamics
        measure_shadow_work : bool, default=False
            True/False to measure shadow work
        pressure: float, default=1.
            pressure in atms for simulation
        temperature: simtk.unit.Quantity, default=300.*unit.kelvin,
            temperature in K for simulation
        phases: list, default = ['complex','solvent','vacuum','apo']
            phases to run, where allowed phases are:
            'complex','solvent','vacuum','apo'
        protein_pdb : str, default=None
            name of protein file
        receptor_mol2 : str, default=None
            name of receptor file if protein_pdb not provided
        small_molecule_forcefield : str, default='openff-1.0.0'
            small molecule forcefield filename
        small_molecule_parameters_cache : str, default=None
            cache file containing small molecule forcefield files
        atom_expression : list default=['IntType']
            list of string for atom mapping criteria. see oechem.OEExprOpts for options
        bond_expression : list default=['DefaultBonds']
            list of string for bond mapping criteria. see oechem.OEExprOpts for options
        map_strength : 'str', default=None
            atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used.
        spectators : str, default=None
            path to any non-alchemical atoms in simulation
        neglect_angles : bool, default=False
            wether to use angle terms in building of unique-new groups. False is strongly recommended
        anneal_14s : bool, default False
            Whether to anneal 1,4 interactions over the protocol;
        nonbonded_method : str, default='PME'
            nonbonded method to use
        softcore_v2=bool, default=False
            wether to use v2 softcore
        alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS
            alchemical functions for transformation
        num_equilibration_iterations: int, default=1000
            number of equilibration steps to do during set up
        num_equilibration_steps_per_iteration: int, default=250,
            number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP
        nsteps_eq : int, default=250000
            number of normal MD steps to take for FAH integrator for PRODUCTION
        nsteps_neq : int, default=250000
            number of nonequilibrium steps to take for FAH integrator for PRODUCTION
        fe_type : str, default='fah'
            tells setup_relative_calculation() to use the fah pipeline
        collision_rate : simtk.unit.Quantity, default=1./unit.picosecond
            collision_rate for PRODUCTION
        collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond
        constraint_tolerance : float, default=1e-6
            tolerance to use for constraints
        n_steps_per_move_application : int default=250
            number of equilibrium steps to take per move
    """
    from perses.utils import data
    if isinstance(temperature, float) or isinstance(temperature, int):
        temperature = temperature * unit.kelvin

    if isinstance(timestep, float) or isinstance(timestep, int):
        timestep = timestep * unit.femtosecond

    if isinstance(pressure, float) or isinstance(pressure, int):
        pressure = pressure * unit.atmosphere

    #turn all of the args into a dict for passing to run_setup
    # HBM - this doesn't feel particularly safe
    # Also, this means that the function can't run without being called by run(), as we are requiring things that aren't arguments to this function, like 'solvent_projid'...etc
    setup_options = locals()
    if 'kwargs' in setup_options.keys(
    ):  #update the setup options w.r.t. kwargs
        setup_options.update(setup_options['kwargs'])
    if protein_kwargs is not None:  #update the setup options w.r.t. the protein kwargs
        setup_options.update(setup_options['protein_kwargs'])
        if 'apo_box_dimensions' not in list(setup_options.keys()):
            setup_options['apo_box_dimensions'] = setup_options[
                'complex_box_dimensions']

    #setups_allowed
    setups_allowed = ['small_molecule', 'protein']
    assert setup in setups_allowed, f"setup {setup} not in setups_allowed: {setups_allowed}"

    # check there is a project_id for each phase
    for phase in phases:
        assert (
            phase in phase_project_ids
        ), f"Phase {phase} requested, but not in phase_project_ids {phase_project_ids.keys()}"

    #some modification for fah-specific functionality:
    setup_options['trajectory_prefix'] = None
    setup_options['anneal_1,4s'] = False
    from perses.utils.openeye import generate_expression
    setup_options['atom_expr'] = generate_expression(
        setup_options['atom_expression'])
    setup_options['bond_expr'] = generate_expression(
        setup_options['bond_expression'])

    #run the run_setup to generate topology proposals and htfs
    _logger.info(f"spectators: {setup_options['spectators']}")
    if setup == 'small_molecule':
        from perses.app.setup_relative_calculation import run_setup
        setup_dict = run_setup(setup_options,
                               serialize_systems=False,
                               build_samplers=False)
        topology_proposals = setup_dict['topology_proposals']
        htfs = setup_dict['hybrid_topology_factories']
    elif setup == 'protein':
        from perses.app.relative_point_mutation_setup import PointMutationExecutor
        setup_engine = PointMutationExecutor(**setup_options)
        topology_proposals = {
            'complex': setup_engine.get_complex_htf()._topology_proposal,
            'apo': setup_engine.get_apo_htf()._topology_proposal
        }
        htfs = {
            'complex': setup_engine.get_complex_htf(),
            'apo': setup_engine.get_apo_htf()
        }

    #create solvent and complex directories
    for phase in htfs.keys():
        _logger.info(f'Setting up phase {phase}')
        phase_dir = f"{phase_project_ids[phase]}/RUNS"
        dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory)
        if not os.path.exists(dir):
            os.makedirs(dir)

        # TODO - replace this with actually saving the importand part of the HTF
        np.savez_compressed(f'{dir}/htf', htfs[phase])

        #serialize the hybrid_system
        data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2")

        #make and serialize an integrator
        integrator = make_neq_integrator(**setup_options)
        data.serialize(integrator, f"{dir}/integrator.xml")

        #create and serialize a state
        try:
            state = relax_structure(
                temperature=temperature,
                system=htfs[phase].hybrid_system,
                positions=htfs[phase].hybrid_positions,
                nequil=num_equilibration_iterations,
                n_steps_per_iteration=num_equilibration_steps_per_iteration,
                collision_rate=collision_rate_setup,
                **kwargs)

            data.serialize(state, f"{dir}/state.xml.bz2")
        except Exception as e:
            _logger.warning(e)
            passed = False
        else:
            passed = True

        pos = state.getPositions(asNumpy=True)
        pos = np.asarray(pos)

        import mdtraj as md
        top = htfs[phase].hybrid_topology
        np.save(f'{dir}/hybrid_topology', top)
        traj = md.Trajectory(pos, top)
        traj.remove_solvent(exclude=['CL', 'NA'], inplace=True)
        traj.save(f'{dir}/hybrid_{phase}.pdb')

        #lastly, make a core.xml
        ###
        nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq
        ncycles = 1
        nsteps_per_ps = 250
        nsteps = ncycles * nsteps_per_cycle
        make_core_file(numSteps=nsteps,
                       xtcFreq=1000 * nsteps_per_ps,
                       globalVarFreq=10 * nsteps_per_ps,
                       directory=dir)

        #create a logger for reference
        # TODO - add more details to this
        references = {
            'start_ligand': old_ligand_index,
            'end_ligand': new_ligand_index,
            'protein_pdb': protein_pdb,
            'passed_strucutre_relax': passed
        }

        np.save(f'{dir}/references', references)

        tp = topology_proposals
        from perses.utils.smallmolecules import render_atom_mapping
        atom_map_filename = f'{dir}/atom_map.png'
        if setup == 'protein':
            from perses.utils.smallmolecules import render_protein_residue_atom_mapping
            render_protein_residue_atom_mapping(tp['apo'], atom_map_filename)
        else:
            old_ligand_oemol, new_ligand_oemol = tp['ligand_oemol_old'], tp[
                'ligand_oemol_new']
            _map = tp['non_offset_new_to_old_atom_map']
            render_atom_mapping(atom_map_filename, old_ligand_oemol,
                                new_ligand_oemol, _map)
예제 #4
0
파일: utils.py 프로젝트: robbason/perses
def validate_endstate_energies(topology_proposal,
                               htf,
                               added_energy,
                               subtracted_energy,
                               beta=1.0 / kT,
                               ENERGY_THRESHOLD=1e-6,
                               platform=DEFAULT_PLATFORM,
                               trajectory_directory=None):
    """
    Function to validate that the difference between the nonalchemical versus alchemical state at lambda = 0,1 is
    equal to the difference in valence energy (forward and reverse).

    Parameters
    ----------
    topology_proposal : perses.topology_proposal.TopologyProposal object
        top_proposal for relevant transformation
    htf : perses.new_relative.HybridTopologyFactory object
        hybrid top factory for setting alchemical hybrid states
    added_energy : float
        reduced added valence energy
    subtracted_energy: float
        reduced subtracted valence energy

    Returns
    -------
    zero_state_energy_difference : float
        reduced potential difference of the nonalchemical and alchemical lambda = 0 state (corrected for valence energy).
    one_state_energy_difference : float
        reduced potential difference of the nonalchemical and alchemical lambda = 1 state (corrected for valence energy).
    """
    import copy
    #import openmmtools.cache as cache
    #context_cache = cache.global_context_cache
    from perses.dispersed.utils import configure_platform
    from perses.utils import data
    platform = configure_platform(platform.getName(),
                                  fallback_platform_name='Reference',
                                  precision='double')

    #create copies of old/new systems and set the dispersion correction
    top_proposal = copy.deepcopy(topology_proposal)
    forces = {
        top_proposal._old_system.getForce(index).__class__.__name__:
        top_proposal._old_system.getForce(index)
        for index in range(top_proposal._old_system.getNumForces())
    }
    forces['NonbondedForce'].setUseDispersionCorrection(False)
    forces = {
        top_proposal._new_system.getForce(index).__class__.__name__:
        top_proposal._new_system.getForce(index)
        for index in range(top_proposal._new_system.getNumForces())
    }
    forces['NonbondedForce'].setUseDispersionCorrection(False)

    #create copy of hybrid system, define old and new positions, and turn off dispersion correction
    hybrid_system = copy.deepcopy(htf.hybrid_system)
    hybrid_system_n_forces = hybrid_system.getNumForces()
    for force_index in range(hybrid_system_n_forces):
        forcename = hybrid_system.getForce(force_index).__class__.__name__
        if forcename == 'NonbondedForce':
            hybrid_system.getForce(force_index).setUseDispersionCorrection(
                False)

    old_positions, new_positions = htf._old_positions, htf._new_positions

    #generate endpoint thermostates
    nonalch_zero, nonalch_one, alch_zero, alch_one = generate_endpoint_thermodynamic_states(
        hybrid_system, top_proposal)

    # compute reduced energies
    #for the nonalchemical systems...
    attrib_list = [('real-old', nonalch_zero, old_positions,
                    top_proposal._old_system.getDefaultPeriodicBoxVectors()),
                   ('hybrid-old', alch_zero, htf._hybrid_positions,
                    hybrid_system.getDefaultPeriodicBoxVectors()),
                   ('hybrid-new', alch_one, htf._hybrid_positions,
                    hybrid_system.getDefaultPeriodicBoxVectors()),
                   ('real-new', nonalch_one, new_positions,
                    top_proposal._new_system.getDefaultPeriodicBoxVectors())]

    rp_list = []
    for (state_name, state, pos, box_vectors) in attrib_list:
        integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds)
        context = state.create_context(integrator, platform)
        samplerstate = states.SamplerState(positions=pos,
                                           box_vectors=box_vectors)
        samplerstate.apply_to_context(context)
        rp = state.reduced_potential(context)
        rp_list.append(rp)
        energy_comps = compute_potential_components(context)
        for name, force in energy_comps:
            print("\t\t\t{}: {}".format(name, force))
        _logger.debug(
            f'added forces:{sum([energy for name, energy in energy_comps])}')
        _logger.debug(f'rp: {rp}')
        if trajectory_directory is not None:
            _logger.info(
                f'Saving {state_name} state xml to {trajectory_directory}/{state_name}-state.gz'
            )
            state = context.getState(getPositions=True,
                                     getVelocities=True,
                                     getForces=True,
                                     getEnergy=True,
                                     getParameters=True)
            data.serialize(state,
                           f'{trajectory_directory}-{state_name}-state.gz')
        del context, integrator

    nonalch_zero_rp, alch_zero_rp, alch_one_rp, nonalch_one_rp = rp_list[
        0], rp_list[1], rp_list[2], rp_list[3]

    ratio = abs((nonalch_zero_rp - alch_zero_rp + added_energy) /
                (nonalch_zero_rp + alch_zero_rp + added_energy))
    assert ratio < ENERGY_THRESHOLD, f"The ratio in energy difference for the ZERO state is {ratio}.\n This is greater than the threshold of {ENERGY_THRESHOLD}.\n real-zero: {nonalch_zero_rp} \n alc-zero: {alch_zero_rp} \nadded-valence: {added_energy}"
    ratio = abs((nonalch_one_rp - alch_one_rp + subtracted_energy) /
                (nonalch_one_rp + alch_one_rp + subtracted_energy))
    assert ratio < ENERGY_THRESHOLD, f"The ratio in energy difference for the ONE state is {ratio}.\n This is greater than the threshold of {ENERGY_THRESHOLD}.\n real-one: {nonalch_one_rp} \n alc-one: {alch_one_rp} \nsubtracted-valence: {subtracted_energy}"

    return abs(nonalch_zero_rp - alch_zero_rp +
               added_energy), abs(nonalch_one_rp - alch_one_rp +
                                  subtracted_energy)