def getSetupOptions(filename):
    """
    Reads input yaml file, makes output directory and returns setup options

    Parameters
    ----------
    filename : str
        .yaml file containing simulation parameters

    Returns
    -------
    setup_options :
        options provided in the yaml file
    phases : list of strings
        phases to simulate, can be 'complex', 'solvent' or 'vacuum'
    """
    yaml_file = open(filename, 'r')
    setup_options = yaml.load(yaml_file, Loader=yaml.FullLoader)
    yaml_file.close()

    _logger.info("\tDetecting phases...")
    if 'phases' not in setup_options:
        setup_options['phases'] = ['complex', 'solvent']
        _logger.warning(
            '\t\tNo phases provided - running complex and solvent as default.')
    else:
        _logger.info(f"\t\tphases detected: {setup_options['phases']}")

    if 'protocol-type' not in setup_options:
        setup_options['protocol-type'] = 'default'

    if 'temperature' not in setup_options:
        setup_options['temperature'] = 300.
    if 'pressure' not in setup_options:
        setup_options['pressure'] = 1.
    if 'solvent_padding' not in setup_options:
        setup_options['solvent_padding'] = 9.
    if 'ionic_strength' not in setup_options:
        setup_options['ionic_strength'] = 0.15

    if 'small_molecule_forcefield' not in setup_options:
        setup_options['small_molecule_forcefield'] = None

    if 'small_molecule_parameters_cache' not in setup_options:
        setup_options['small_molecule_parameters_cache'] = None

    if 'remove_constraints' not in setup_options:
        setup_options['remove_constraints'] = False
        _logger.info('No constraints will be removed')
    # remove_constraints can be 'all' or 'not water'
    elif setup_options['remove_constraints'] not in [
            'all', 'not water', False
    ]:
        _logger.warning(
            "remove_constraints value of {setup_options['remove_constraints']} not understood. 'all', 'none' or 'not water' are valid options. NOT REMOVING ANY CONSTRAINTS"
        )
        setup_options['remove_constraints'] = False

    if 'spectators' not in setup_options:
        _logger.info(f'No spectators')
        setup_options['spectators'] = None

    if 'complex_box_dimensions' not in setup_options:
        setup_options['complex_box_dimensions'] = None
    else:
        setup_options['complex_box_dimensions'] = tuple(
            [float(x) for x in setup_options['complex_box_dimensions']])

    if 'solvent_box_dimensions' not in setup_options:
        setup_options['solvent_box_dimensions'] = None

    # Not sure why these are needed
    # TODO: Revisit these?
    if 'neglect_angles' not in setup_options:
        setup_options['neglect_angles'] = False
    if 'anneal_1,4s' not in setup_options:
        setup_options['anneal_1,4s'] = False

    if 'nonbonded_method' not in setup_options:
        setup_options['nonbonded_method'] = 'PME'

    if 'render_atom_map' not in setup_options:
        setup_options['render_atom_map'] = True

    if 'n_steps_per_move_application' not in setup_options:
        setup_options['n_steps_per_move_application'] = 1

    if 'run_type' not in setup_options:
        _logger.info(f"\t\t\trun_type is not specified; default to None")
        setup_options['run_type'] = None
    _logger.info(f"\tDetecting fe_type...")
    if setup_options['fe_type'] == 'sams':
        _logger.info(f"\t\tfe_type: sams")
        # check if some parameters are provided, otherwise use defaults
        if 'flatness-criteria' not in setup_options:
            setup_options['flatness-criteria'] = 'minimum-visits'
            _logger.info(
                f"\t\t\tflatness-criteria not specified: default to minimum-visits."
            )
        if 'offline-freq' not in setup_options:
            setup_options['offline-freq'] = 10
            _logger.info(f"\t\t\toffline-freq not specified: default to 10.")
        if 'gamma0' not in setup_options:
            setup_options['gamma0'] = 1.
            _logger.info(f"\t\t\tgamma0 not specified: default to 1.0.")
        if 'beta_factor' not in setup_options:
            setup_options['beta_factor'] = 0.8
            _logger.info(f"\t\t\tbeta_factor not specified: default to 0.8.")
        if 'n_replicas' not in setup_options:
            setup_options['n_replicas'] = 1
    elif setup_options['fe_type'] == 'repex':
        _logger.info(f"\t\tfe_type: repex")
        if 'offline-freq' not in setup_options:
            setup_options['offline-freq'] = 10
            _logger.info(f"\t\t\toffline-freq not specified: default to 10.")
    elif setup_options[
            'fe_type'] == 'neq':  #there are some neq attributes that are not used with the equilibrium samplers...
        _logger.info(f"\t\tfe_type: neq")
        if 'n_equilibrium_steps_per_iteration' not in setup_options:
            _logger.info(
                f"\t\t\tn_equilibrium_steps_per_iteration not specified: default to 1000."
            )
            setup_options['n_equilibrium_steps_per_iteration'] = 1000
        if 'measure_shadow_work' not in setup_options:
            _logger.info(
                f"\t\t\tmeasure_shadow_work not specified: default to False")
            setup_options['measure_shadow_work'] = False
        if 'write_ncmc_configuration' not in setup_options:
            _logger.info(
                f"\t\t\twrite_ncmc_configuration not specified: default to False"
            )
            setup_options['write_ncmc_configuration'] = False
        if 'neq_integrator' not in setup_options:
            _logger.info(
                f"\t\t\tneq_integrator not specified; default to 'langevin'")
            setup_options['neq_integrator'] = 'langevin'

        #for dask implementation
        if 'processes' not in setup_options:
            _logger.info(f"\t\t\tprocesses is not specified; default to 0")
            setup_options['processes'] = 0
        if 'adapt' not in setup_options:
            _logger.info(f"\t\t\tadapt is not specified; default to True")
            setup_options['adapt'] = True
        if 'max_file_size' not in setup_options:
            _logger.info(
                f"\t\t\tmax_file_size is not specified; default to 10MB")
            setup_options['max_file_size'] = 10 * 1024e3
        if 'lambda_protocol' not in setup_options:
            _logger.info(
                f"\t\t\tlambda_protocol is not specified; default to 'default'"
            )
            setup_options['lambda_protocol'] = 'default'
        if 'LSF' not in setup_options:
            _logger.info(f"\t\t\tLSF is not specified; default to False")
            setup_options['LSF'] = False

        if 'run_type' not in setup_options:
            _logger.info(f"\t\t\trun_type is not specified; default to None")
            setup_options['run_type'] = None
        elif setup_options['run_type'] == 'anneal':
            if 'out_trajectory_prefix' not in setup_options:
                raise Exception(
                    f"'out_trajectory_prefix' must be defined if 'anneal' is called.  Aborting!"
                )
            _logger.info(
                f"'run_type' was called as {setup_options['run_type']} attempting to detect file"
            )
            for phase in setup_options['phases']:
                path = os.path.join(
                    setup_options['trajectory_directory'],
                    f"{setup_options['trajectory_prefix']}_{phase}_fep.eq.pkl")
                if os.path.exists(path):
                    _logger.info(
                        f"\t\t\tfound {path}; loading and proceeding to anneal"
                    )
                else:
                    raise Exception(f"{path} could not be found.  Aborting!")
        elif setup_options['run_type'] == 'None':
            setup_options['run_type'] = None
        elif str(setup_options['run_type']) not in [
                'None', 'anneal', 'equilibrate'
        ]:
            raise Exception(
                f"'run_type' must be None, 'anneal', or 'equilibrate'; input was specified as {setup_options['run_type']} with type {type(setup_options['run_type'])}"
            )

        #to instantiate the particles:

        if 'trailblaze' not in setup_options:
            assert 'lambdas' in setup_options, f"'lambdas' is not in setup_options, and 'trailblaze' is False. One must be specified.  Aborting!"
            assert type(setup_options['lambdas']
                        ) == int, f"lambdas is not an int.  Aborting!"
            setup_options['trailblaze'] = None
        else:
            assert type(
                setup_options['trailblaze']
            ) == dict, f"trailblaze is specified, but is not a dict"

        if 'resample' in setup_options:
            assert type(
                setup_options['resample']) == dict, f"'resample' is not a dict"
            assert set(['criterion', 'method', 'threshold'
                        ]).issubset(set(list(setup_options['resample'].keys(
                        )))), f"'resample' does not contain necessary keys"
        else:
            _logger.info(f"\t\tresample is not specified; defaulting to None")
            setup_options['resample'] = None

        if 'n_particles' not in setup_options:
            raise Exception(
                f"for particle annealing, 'n_particles' must be specified")
        if 'direction' not in setup_options:
            _logger.info(
                f"\t\t\tdirection is not specified; default to (running both forward and reverse)"
            )
            setup_options['direction'] = ['forward', 'reverse']
        else:
            _logger.info(
                f"\t\t\tthe directions are as follows: {setup_options['direction']}"
            )

        if 'ncmc_save_interval' not in setup_options:
            _logger.info(
                f"\t\t\tncmc_save_interval not specified: default to None.")
            setup_options['ncmc_save_interval'] = None
        if 'ncmc_collision_rate_ps' not in setup_options:
            _logger.info(
                f"\t\t\tcollision_rate not specified: default to np.inf.")
            setup_options['ncmc_collision_rate_ps'] = np.inf / unit.picoseconds
        else:
            setup_options['ncmc_collision_rate_ps'] /= unit.picoseconds
        if 'ncmc_rethermalize' not in setup_options:
            _logger.info(
                f"\t\t\tncmc_rethermalize not specified; default to False.")
            setup_options['ncmc_rethermalize'] = False

        #now lastly, for the algorithm_4 options:
        if 'observable' not in setup_options:
            _logger.info(f"\t\t\tobservable is not specified; default to ESS")
            setup_options['observable'] = 'ESS'
        if 'trailblaze_observable_threshold' not in setup_options:
            _logger.info(
                f"\t\t\ttrailblaze_observable_threshold is not specified; default to 0.0"
            )
            setup_options['trailblaze_observable_threshold'] = None
        if 'resample_observable_threshold' not in setup_options:
            _logger.info(
                f"\t\t\tresample_observable_threshold is not specified; default to 0.0"
            )
            setup_options['resample_observable_threshold'] = None
        if 'ncmc_num_integration_steps' not in setup_options:
            _logger.info(
                f"\t\t\tncmc_num_integration_steps is not specified; default to 1"
            )
            setup_options['ncmc_num_integration_steps'] = 1
        if 'resampling_method' not in setup_options:
            _logger.info(
                f"\t\t\tresampling_method is not specified; default to 'multinomial'"
            )
            setup_options['resampling_method'] = 'multinomial'
        if 'online_protocol' not in setup_options:
            _logger.info(
                f"\t\t\tonline_protocol is not specified; default to None")
            setup_options['online_protocol'] = None

        setup_options[
            'n_steps_per_move_application'] = 1  #setting the writeout to 1 for now

    trajectory_directory = setup_options['trajectory_directory']

    # check if the neglect_angles is specified in yaml

    if 'neglect_angles' not in setup_options:
        setup_options['neglect_angles'] = False
        _logger.info(f"\t'neglect_angles' not specified: default to 'False'.")
    else:
        _logger.info(
            f"\t'neglect_angles' detected: {setup_options['neglect_angles']}.")

    if 'atom_expression' in setup_options:
        # need to convert the list to Integer
        from perses.utils.openeye import generate_expression
        setup_options['atom_expr'] = generate_expression(
            setup_options['atom_expression'])
    else:
        setup_options['atom_expr'] = None

    if 'bond_expression' in setup_options:
        # need to convert the list to Integer
        from perses.utils.openeye import generate_expression
        setup_options['bond_expr'] = generate_expression(
            setup_options['bond_expression'])
    else:
        setup_options['bond_expr'] = None

    if 'map_strength' not in setup_options:
        setup_options['map_strength'] = None

    if 'anneal_1,4s' not in setup_options:
        setup_options['anneal_1,4s'] = False
        _logger.info(
            f"\t'anneal_1,4s' not specified: default to 'False' (i.e. since 1,4 interactions are not being annealed, they are being used to make new/old atom proposals in the geometry engine.)"
        )

    if 'softcore_v2' not in setup_options:
        setup_options['softcore_v2'] = False
        _logger.info(f"\t'softcore_v2' not specified: default to 'False'")

    _logger.info(f"\tCreating '{trajectory_directory}'...")
    assert (
        not os.path.exists(trajectory_directory)
    ), f'Output trajectory directory "{trajectory_directory}" already exists. Refusing to overwrite'
    os.makedirs(trajectory_directory)

    return setup_options
Beispiel #2
0
def run_neq_fah_setup(ligand_file,
                      old_ligand_index,
                      new_ligand_index,
                      forcefield_files,
                      trajectory_directory,
                      complex_box_dimensions=(9.8, 9.8, 9.8),
                      solvent_box_dimensions=(3.5, 3.5, 3.5),
                      timestep=4.0,
                      eq_splitting='V R O R V',
                      neq_splitting='V R H O R V',
                      measure_shadow_work=False,
                      pressure=1.0,
                      temperature=300. * unit.kelvin,
                      solvent_padding=9 * unit.angstroms,
                      phases=['complex', 'solvent', 'vacuum'],
                      phase_project_ids=None,
                      protein_pdb=None,
                      receptor_mol2=None,
                      small_molecule_forcefield='openff-1.2.0',
                      small_molecule_parameters_cache=None,
                      atom_expression=['IntType'],
                      bond_expression=['DefaultBonds'],
                      spectators=None,
                      neglect_angles=False,
                      anneal_14s=False,
                      nonbonded_method='PME',
                      map_strength=None,
                      softcore_v2=False,
                      save_setup_pickle_as=None,
                      render_atom_map=False,
                      alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS,
                      num_equilibration_iterations=1000,
                      num_equilibration_steps_per_iteration=250,
                      nsteps_eq=250000,
                      nsteps_neq=250000,
                      fe_type='fah',
                      collision_rate=1. / unit.picoseconds,
                      collision_rate_setup=90. / unit.picoseconds,
                      constraint_tolerance=1e-6,
                      n_steps_per_move_application=250,
                      globalVarFreq=250,
                      setup='small_molecule',
                      protein_kwargs=None,
                      ionic_strength=0.15 * unit.molar,
                      remove_constraints='not water',
                      **kwargs):
    """
    main execution function that will:
        - create a directory for each phase according to the `trajectory_directory` argument
        - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file`
        - create topology proposals for all phases
        - create/serialize hybrid factories or all phases (and validate endstates)
        - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases
        - relax generated structures with a minimizer and LangevinIntegrator for all phases
        - create/serialize a state associated with the relaxed structures
        - create/serialize a `core.xml` object for all phases


    >>> run_neq_fah_setup('ligand.sdf', 0, 1,['amber/ff14SB.xml','amber/tip3p_standard.xml','amber/tip3p_HFE_multivalent.xml'],'RUN0',protein_pdb='protein.pdb', phases=['complex','solvent','vacuum'],phase_project_ids={'complex':14320,'solvent':14321,'vacuum':'vacuum'})

    arguments
        ligand_file : str
            .sdf (or any openeye-readable) file containing ligand labeled indices and structures
        old_ligand_index : int
            index of the old ligand
        new_ligand_index : int
            inded of the new ligand
        forcefield_files : list of str
            list of forcefields to use for complex/solvent parameterization
        trajectory_directory : str
            RUNXXX for FAH deployment
        complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8)
            define box dimensions of complex phase (in nm)
        solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5)
            define box dimensions of solvent phase (in nm)
        timestep : float, default=4.
            step size of nonequilibrium integration
        eq_splitting : str, default = 'V R O R V'
            splitting string of relaxation dynamics
        neq_splitting : str, default = 'V R H O R V'
            splitting string of nonequilibrium dynamics
        measure_shadow_work : bool, default=False
            True/False to measure shadow work
        pressure: float, default=1.
            pressure in atms for simulation
        temperature: simtk.unit.Quantity, default=300.*unit.kelvin,
            temperature in K for simulation
        phases: list, default = ['complex','solvent','vacuum','apo']
            phases to run, where allowed phases are:
            'complex','solvent','vacuum','apo'
        protein_pdb : str, default=None
            name of protein file
        receptor_mol2 : str, default=None
            name of receptor file if protein_pdb not provided
        small_molecule_forcefield : str, default='openff-1.0.0'
            small molecule forcefield filename
        small_molecule_parameters_cache : str, default=None
            cache file containing small molecule forcefield files
        atom_expression : list default=['IntType']
            list of string for atom mapping criteria. see oechem.OEExprOpts for options
        bond_expression : list default=['DefaultBonds']
            list of string for bond mapping criteria. see oechem.OEExprOpts for options
        map_strength : 'str', default=None
            atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used.
        spectators : str, default=None
            path to any non-alchemical atoms in simulation
        neglect_angles : bool, default=False
            wether to use angle terms in building of unique-new groups. False is strongly recommended
        anneal_14s : bool, default False
            Whether to anneal 1,4 interactions over the protocol;
        nonbonded_method : str, default='PME'
            nonbonded method to use
        softcore_v2=bool, default=False
            wether to use v2 softcore
        alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS
            alchemical functions for transformation
        num_equilibration_iterations: int, default=1000
            number of equilibration steps to do during set up
        num_equilibration_steps_per_iteration: int, default=250,
            number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP
        nsteps_eq : int, default=250000
            number of normal MD steps to take for FAH integrator for PRODUCTION
        nsteps_neq : int, default=250000
            number of nonequilibrium steps to take for FAH integrator for PRODUCTION
        fe_type : str, default='fah'
            tells setup_relative_calculation() to use the fah pipeline
        collision_rate : simtk.unit.Quantity, default=1./unit.picosecond
            collision_rate for PRODUCTION
        collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond
        constraint_tolerance : float, default=1e-6
            tolerance to use for constraints
        n_steps_per_move_application : int default=250
            number of equilibrium steps to take per move
    """
    from perses.utils import data
    if isinstance(temperature, float) or isinstance(temperature, int):
        temperature = temperature * unit.kelvin

    if isinstance(timestep, float) or isinstance(timestep, int):
        timestep = timestep * unit.femtosecond

    if isinstance(pressure, float) or isinstance(pressure, int):
        pressure = pressure * unit.atmosphere

    #turn all of the args into a dict for passing to run_setup
    # HBM - this doesn't feel particularly safe
    # Also, this means that the function can't run without being called by run(), as we are requiring things that aren't arguments to this function, like 'solvent_projid'...etc
    setup_options = locals()
    if 'kwargs' in setup_options.keys(
    ):  #update the setup options w.r.t. kwargs
        setup_options.update(setup_options['kwargs'])
    if protein_kwargs is not None:  #update the setup options w.r.t. the protein kwargs
        setup_options.update(setup_options['protein_kwargs'])
        if 'apo_box_dimensions' not in list(setup_options.keys()):
            setup_options['apo_box_dimensions'] = setup_options[
                'complex_box_dimensions']

    #setups_allowed
    setups_allowed = ['small_molecule', 'protein']
    assert setup in setups_allowed, f"setup {setup} not in setups_allowed: {setups_allowed}"

    # check there is a project_id for each phase
    for phase in phases:
        assert (
            phase in phase_project_ids
        ), f"Phase {phase} requested, but not in phase_project_ids {phase_project_ids.keys()}"

    #some modification for fah-specific functionality:
    setup_options['trajectory_prefix'] = None
    setup_options['anneal_1,4s'] = False
    from perses.utils.openeye import generate_expression
    setup_options['atom_expr'] = generate_expression(
        setup_options['atom_expression'])
    setup_options['bond_expr'] = generate_expression(
        setup_options['bond_expression'])

    #run the run_setup to generate topology proposals and htfs
    _logger.info(f"spectators: {setup_options['spectators']}")
    if setup == 'small_molecule':
        from perses.app.setup_relative_calculation import run_setup
        setup_dict = run_setup(setup_options,
                               serialize_systems=False,
                               build_samplers=False)
        topology_proposals = setup_dict['topology_proposals']
        htfs = setup_dict['hybrid_topology_factories']
    elif setup == 'protein':
        from perses.app.relative_point_mutation_setup import PointMutationExecutor
        setup_engine = PointMutationExecutor(**setup_options)
        topology_proposals = {
            'complex': setup_engine.get_complex_htf()._topology_proposal,
            'apo': setup_engine.get_apo_htf()._topology_proposal
        }
        htfs = {
            'complex': setup_engine.get_complex_htf(),
            'apo': setup_engine.get_apo_htf()
        }

    #create solvent and complex directories
    for phase in htfs.keys():
        _logger.info(f'Setting up phase {phase}')
        phase_dir = f"{phase_project_ids[phase]}/RUNS"
        dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory)
        if not os.path.exists(dir):
            os.makedirs(dir)

        # TODO - replace this with actually saving the importand part of the HTF
        np.savez_compressed(f'{dir}/htf', htfs[phase])

        #serialize the hybrid_system
        data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2")

        #make and serialize an integrator
        integrator = make_neq_integrator(**setup_options)
        data.serialize(integrator, f"{dir}/integrator.xml")

        #create and serialize a state
        try:
            state = relax_structure(
                temperature=temperature,
                system=htfs[phase].hybrid_system,
                positions=htfs[phase].hybrid_positions,
                nequil=num_equilibration_iterations,
                n_steps_per_iteration=num_equilibration_steps_per_iteration,
                collision_rate=collision_rate_setup,
                **kwargs)

            data.serialize(state, f"{dir}/state.xml.bz2")
        except Exception as e:
            _logger.warning(e)
            passed = False
        else:
            passed = True

        pos = state.getPositions(asNumpy=True)
        pos = np.asarray(pos)

        import mdtraj as md
        top = htfs[phase].hybrid_topology
        np.save(f'{dir}/hybrid_topology', top)
        traj = md.Trajectory(pos, top)
        traj.remove_solvent(exclude=['CL', 'NA'], inplace=True)
        traj.save(f'{dir}/hybrid_{phase}.pdb')

        #lastly, make a core.xml
        ###
        nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq
        ncycles = 1
        nsteps_per_ps = 250
        nsteps = ncycles * nsteps_per_cycle
        make_core_file(numSteps=nsteps,
                       xtcFreq=1000 * nsteps_per_ps,
                       globalVarFreq=10 * nsteps_per_ps,
                       directory=dir)

        #create a logger for reference
        # TODO - add more details to this
        references = {
            'start_ligand': old_ligand_index,
            'end_ligand': new_ligand_index,
            'protein_pdb': protein_pdb,
            'passed_strucutre_relax': passed
        }

        np.save(f'{dir}/references', references)

        tp = topology_proposals
        from perses.utils.smallmolecules import render_atom_mapping
        atom_map_filename = f'{dir}/atom_map.png'
        if setup == 'protein':
            from perses.utils.smallmolecules import render_protein_residue_atom_mapping
            render_protein_residue_atom_mapping(tp['apo'], atom_map_filename)
        else:
            old_ligand_oemol, new_ligand_oemol = tp['ligand_oemol_old'], tp[
                'ligand_oemol_new']
            _map = tp['non_offset_new_to_old_atom_map']
            render_atom_mapping(atom_map_filename, old_ligand_oemol,
                                new_ligand_oemol, _map)
Beispiel #3
0
def run_neq_fah_setup(ligand_file,
                      old_ligand_index,
                      new_ligand_index,
                      forcefield_files,
                      trajectory_directory,
                      complex_box_dimensions=(9.8, 9.8, 9.8),
                      solvent_box_dimensions=(3.5, 3.5, 3.5),
                      timestep=4.0 * unit.femtosecond,
                      eq_splitting='V R O R V',
                      neq_splitting='V R H O R V',
                      measure_shadow_work=False,
                      pressure=1.0,
                      temperature=300,
                      solvent_padding=9 * unit.angstroms,
                      phases=['complex', 'solvent', 'vacuum'],
                      protein_pdb=None,
                      receptor_mol2=None,
                      small_molecule_forcefield='openff-1.0.0',
                      small_molecule_parameters_cache=None,
                      atom_expression=['IntType'],
                      bond_expression=['DefaultBonds'],
                      spectators=None,
                      neglect_angles=False,
                      anneal_14s=False,
                      nonbonded_method='PME',
                      map_strength=None,
                      softcore_v2=False,
                      save_setup_pickle_as=None,
                      render_atom_map=False,
                      alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS,
                      num_equilibration_iterations=1000,
                      num_equilibration_steps_per_iteration=250,
                      nsteps_eq=250000,
                      nsteps_neq=250000,
                      fe_type='fah',
                      collision_rate=1. / unit.picoseconds,
                      collision_rate_setup=90. / unit.picoseconds,
                      constraint_tolerance=1e-6,
                      n_steps_per_move_application=250,
                      globalVarFreq=250,
                      **kwargs):
    """
    main execution function that will:
        - create a directory for each phase according to the `trajectory_directory` argument
        - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file`
        - create topology proposals for all phases
        - create/serialize hybrid factories or all phases (and validate endstates)
        - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases
        - relax generated structures with a minimizer and LangevinIntegrator for all phases
        - create/serialize a state associated with the relaxed structures
        - create/serialize a `core.xml` object for all phases

    arguments
        ligand_file : str
            .sdf (or any openeye-readable) file containing ligand labeled indices and structures
        old_ligand_index : int
            index of the old ligand
        new_ligand_index : int
            inded of the new ligand
        forcefield_files : list of str
            list of forcefields to use for complex/solvent parameterization
        trajectory_directory : str
            RUNXXX for FAH deployment
        complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8)
            define box dimensions of complex phase
        solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5)
            define box dimensions of solvent phase
        timestep : simtk.unit.Quantity, default=4.*unit.femtosecond
            step size of nonequilibrium integration
        eq_splitting : str, default = 'V R O R V'
            splitting string of relaxation dynamics
        neq_splitting : str, default = 'V R H O R V'
            splitting string of nonequilibrium dynamics
        measure_shadow_work : bool, default=False
            True/False to measure shadow work
        pressure: float, default=1.
            pressure in atms for simulation
        temperature: float, default=300.,
            temperature in K for simulation
        phases: list, default = ['complex','solvent','vacuum']
            phases to run, where allowed phases are 'complex','solvent','vacuum'
        protein_pdb : str, default=None
            name of protein file
        receptor_mol2 : str, default=None
            name of receptor file if protein_pdb not provided
        small_molecule_forcefield : str, default='openff-1.0.0'
            small molecule forcefield filename
        small_molecule_parameters_cache : str, default=None
            cache file containing small molecule forcefield files
        atom_expression : list default=['IntType']
            list of string for atom mapping criteria. see oechem.OEExprOpts for options
        bond_expression : list default=['DefaultBonds']
            list of string for bond mapping criteria. see oechem.OEExprOpts for options
        map_strength : 'str', default=None
            atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used.
        spectators : str, default=None
            path to any non-alchemical atoms in simulation
        neglect_angles : bool, default=False
            wether to use angle terms in building of unique-new groups. False is strongly recommended
        anneal_14s : bool, default False
            Whether to anneal 1,4 interactions over the protocol;
        nonbonded_method : str, default='PME'
            nonbonded method to use
        softcore_v2=bool, default=False
            wether to use v2 softcore
        alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS
            alchemical functions for transformation
        num_equilibration_iterations: int, default=1000
            number of equilibration steps to do during set up
        num_equilibration_steps_per_iteration: int, default=250,
            number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP
        nsteps_eq : int, default=250000
            number of normal MD steps to take for FAH integrator for PRODUCTION
        nsteps_neq : int, default=250000
            number of nonequilibrium steps to take for FAH integrator for PRODUCTION
        fe_type : str, default='fah'
            tells setup_relative_calculation() to use the fah pipeline
        collision_rate : simtk.unit.Quantity, default=1./unit.picosecond
            collision_rate for PRODUCTION
        collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond
        constraint_tolerance : float, default=1e-6
            tolerance to use for constraints
        n_steps_per_move_application : int default=250
            number of equilibrium steps to take per move
    """
    from perses.app.setup_relative_calculation import run_setup
    from perses.utils import data
    #turn all of the args into a dict for passing to run_setup
    setup_options = locals()
    if 'kwargs' in setup_options.keys():
        setup_options.update(setup_options['kwargs'])

    #some modification for fah-specific functionality:
    setup_options['trajectory_prefix'] = None
    setup_options['anneal_1,4s'] = False
    from perses.utils.openeye import generate_expression
    setup_options['atom_expr'] = generate_expression(
        setup_options['atom_expression'])
    setup_options['bond_expr'] = generate_expression(
        setup_options['bond_expression'])

    #run the run_setup to generate topology proposals and htfs
    _logger.info(f"spectators: {setup_options['spectators']}")
    setup_dict = run_setup(setup_options,
                           serialize_systems=False,
                           build_samplers=False)
    topology_proposals = setup_dict['topology_proposals']
    htfs = setup_dict['hybrid_topology_factories']

    #create solvent and complex directories
    for phase in htfs.keys():
        _logger.info(f'PHASE RUNNING: {phase}')
        _logger.info(f'Setting up phase {phase}')
        if phase == 'solvent':
            phase_dir = f"{setup_options['solvent_projid']}/RUNS"
        if phase == 'complex':
            phase_dir = f"{setup_options['complex_projid']}/RUNS"
        if phase == 'vacuum':
            phase_dir = 'VACUUM/RUNS'
        dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory)
        if not os.path.exists(dir):
            os.mkdir(dir)

        np.savez_compressed(f'{dir}/htf', htfs[phase])

        #serialize the hybrid_system
        data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2")

        #make and serialize an integrator
        integrator = make_neq_integrator(**setup_options)
        data.serialize(integrator, f"{dir}/integrator.xml")

        #create and serialize a state
        try:
            state = relax_structure(
                temperature=temperature,
                system=htfs[phase].hybrid_system,
                positions=htfs[phase].hybrid_positions,
                nequil=num_equilibration_iterations,
                n_steps_per_iteration=num_equilibration_steps_per_iteration,
                collision_rate=collision_rate_setup)

            data.serialize(state, f"{dir}/state.xml.bz2")
        except Exception as e:
            print(e)
            passed = False
        else:
            passed = True

        pos = state.getPositions(asNumpy=True)
        pos = np.asarray(pos)

        import mdtraj as md
        top = htfs[phase].hybrid_topology
        np.save(f'{dir}/hybrid_topology', top)
        traj = md.Trajectory(pos, top)
        traj.remove_solvent(exclude=['CL', 'NA'], inplace=True)
        traj.save(f'{dir}/hybrid_{phase}.pdb')

        #lastly, make a core.xml
        nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq
        ncycles = 1
        nsteps_per_ps = 250
        core_parameters = {
            'numSteps': ncycles * nsteps_per_cycle,
            'xtcFreq': 1000 * nsteps_per_ps,  # once per ns
            'xtcAtoms': 'solute',
            'precision': 'mixed',
            'globalVarFilename': 'globals.csv',
            'globalVarFreq': 10 * nsteps_per_ps,
        }
        # Serialize core.xml
        import dicttoxml
        with open(f'{dir}/core.xml', 'wt') as outfile:
            #core_parameters = create_core_parameters(phase)
            xml = dicttoxml.dicttoxml(core_parameters,
                                      custom_root='config',
                                      attr_type=False)
            from xml.dom.minidom import parseString
            dom = parseString(xml)
            outfile.write(dom.toprettyxml())

        #create a logger for reference
        references = {
            'start_ligand': old_ligand_index,
            'end_ligand': new_ligand_index,
            'protein_pdb': protein_pdb,
            'passed_strucutre_relax': passed
        }

        np.save(f'{dir}/references', references)

        tp = topology_proposals
        from perses.utils.smallmolecules import render_atom_mapping
        render_atom_mapping(f'{dir}/atom_map.png', tp['ligand_oemol_old'],
                            tp['ligand_oemol_new'],
                            tp['non_offset_new_to_old_atom_map'])
Beispiel #4
0
def test_generate_expression():
    from perses.utils.openeye import generate_expression
    list_to_check = ['Hybridization', 'IntType']
    value = generate_expression(list_to_check)
    assert value == 134217984, 'generate_expression didn\'t return expected value'
Beispiel #5
0
def compare_energies(mol_name="naphthalene",
                     ref_mol_name="benzene",
                     atom_expression=['Hybridization'],
                     bond_expression=['Hybridization']):
    """
    Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same.
    """
    from openmmtools.constants import kB
    from openmmtools import alchemy, states
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from perses.annihilation.relative import HybridTopologyFactory
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    import simtk.openmm as openmm
    from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers
    from perses.utils.openeye import generate_expression
    from openmmforcefields.generators import SystemGenerator
    from openmoltools.forcefield_generators import generateTopologyFromOEMol
    from perses.tests.utils import validate_endstate_energies
    temperature = 300 * unit.kelvin
    # Compute kT and inverse temperature.
    kT = kB * temperature
    beta = 1.0 / kT
    ENERGY_THRESHOLD = 1e-6

    atom_expr, bond_expr = generate_expression(
        atom_expression), generate_expression(bond_expression)

    mol = iupac_to_oemol(mol_name)
    mol = generate_conformers(mol, max_confs=1)

    refmol = iupac_to_oemol(ref_mol_name)
    refmol = generate_conformers(refmol, max_confs=1)

    from openforcefield.topology import Molecule
    molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]]
    barostat = None
    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'nonbondedMethod': app.NoCutoff,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }

    system_generator = SystemGenerator(forcefields=forcefield_files,
                                       barostat=barostat,
                                       forcefield_kwargs=forcefield_kwargs,
                                       small_molecule_forcefield='gaff-2.11',
                                       molecules=molecules,
                                       cache=None)

    topology = generateTopologyFromOEMol(refmol)
    system = system_generator.create_system(topology)
    positions = extractPositionsFromOEMol(refmol)

    proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol],
                                                     system_generator)
    proposal = proposal_engine.propose(system,
                                       topology,
                                       atom_expr=atom_expr,
                                       bond_expr=bond_expr)
    geometry_engine = FFAllAngleGeometryEngine()
    new_positions, _ = geometry_engine.propose(
        proposal, positions, beta=beta, validate_energy_bookkeeping=False)
    _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta)
    #make a topology proposal with the appropriate data:

    factory = HybridTopologyFactory(proposal, positions, new_positions)
    if not proposal.unique_new_atoms:
        assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
        assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
        vacuum_added_valence_energy = 0.0
    else:
        added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

    if not proposal.unique_old_atoms:
        assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
        assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
        subtracted_valence_energy = 0.0
    else:
        subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

    zero_state_error, one_state_error = validate_endstate_energies(
        factory._topology_proposal,
        factory,
        added_valence_energy,
        subtracted_valence_energy,
        beta=1.0 / (kB * temperature),
        ENERGY_THRESHOLD=ENERGY_THRESHOLD,
        platform=openmm.Platform.getPlatformByName('Reference'))
    return factory
Beispiel #6
0
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene",
                                           proposed_mol_name="benzene",
                                           current_mol_smiles=None,
                                           proposed_mol_smiles=None,
                                           vacuum=False,
                                           render_atom_mapping=False,
                                           atom_expression=['Hybridization'],
                                           bond_expression=['Hybridization']):
    """
    This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles.
    The function will (by default) read the iupac names first.  If they are set to None, then it will attempt to read a set of current and new smiles.
    An atom mapping pdf will be generated if specified.
    Parameters
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule
    current_mol_smiles : str (default None)
        current mol smiles
    proposed_mol_smiles : str (default None)
        proposed mol smiles
    vacuum: bool (default False)
        whether to render a vacuum or solvated topology_proposal
    render_atom_mapping : bool (default False)
        whether to render the atom map of the current_mol_name and proposed_mol_name
    atom_expression : list(str), optional
        list of atom mapping criteria
    bond_expression : list(str), optional
        list of bond mapping criteria

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from openeye import oechem
    from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol
    from openmoltools import forcefield_generators
    import perses.utils.openeye as openeye
    from perses.utils.data import get_data_filename
    from perses.rjmc.topology_proposal import TopologyProposal, SmallMoleculeSetProposalEngine
    import simtk.unit as unit
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    from perses.utils.openeye import generate_expression
    from openmmforcefields.generators import SystemGenerator
    from openforcefield.topology import Molecule

    atom_expr = generate_expression(atom_expression)
    bond_expr = generate_expression(bond_expression)

    if current_mol_name != None and proposed_mol_name != None:
        try:
            old_oemol, new_oemol = iupac_to_oemol(
                current_mol_name), iupac_to_oemol(proposed_mol_name)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(
                f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!"
            )
    elif current_mol_smiles != None and proposed_mol_smiles != None:
        try:
            old_oemol, new_oemol = smiles_to_oemol(
                current_mol_smiles), smiles_to_oemol(proposed_mol_smiles)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(f"the variables are not compatible")
    else:
        raise Exception(
            f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings."
        )

    old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES(
        old_smiles, title="MOL")

    #correct the old positions
    old_positions = openeye.extractPositionsFromOEMol(old_oemol)
    old_positions = old_positions.in_units_of(unit.nanometers)

    new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES(
        new_smiles, title="NEW")

    ffxml = forcefield_generators.generateForceFieldFromMolecules(
        [old_oemol, new_oemol])

    old_oemol.SetTitle('MOL')
    new_oemol.SetTitle('MOL')

    old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol)
    new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol)

    if not vacuum:
        nonbonded_method = app.PME
        barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                             300.0 * unit.kelvin, 50)
    else:
        nonbonded_method = app.NoCutoff
        barostat = None

    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }
    periodic_forcefield_kwargs = {'nonbondedMethod': nonbonded_method}
    small_molecule_forcefield = 'gaff-2.11'

    system_generator = SystemGenerator(
        forcefields=forcefield_files,
        barostat=barostat,
        forcefield_kwargs=forcefield_kwargs,
        periodic_forcefield_kwargs=periodic_forcefield_kwargs,
        small_molecule_forcefield=small_molecule_forcefield,
        molecules=[
            Molecule.from_openeye(mol) for mol in [old_oemol, new_oemol]
        ],
        cache=None)

    proposal_engine = SmallMoleculeSetProposalEngine([old_oemol, new_oemol],
                                                     system_generator,
                                                     residue_name='MOL',
                                                     atom_expr=atom_expr,
                                                     bond_expr=bond_expr,
                                                     allow_ring_breaking=True)
    geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                               use_sterics=False,
                                               n_bond_divisions=1000,
                                               n_angle_divisions=180,
                                               n_torsion_divisions=360,
                                               verbose=True,
                                               storage=None,
                                               bond_softening_constant=1.0,
                                               angle_softening_constant=1.0,
                                               neglect_angles=False)

    if not vacuum:
        #now to solvate
        modeller = app.Modeller(old_topology, old_positions)
        hs = [
            atom for atom in modeller.topology.atoms()
            if atom.element.symbol in ['H']
            and atom.residue.name not in ['MOL', 'OLD', 'NEW']
        ]
        modeller.delete(hs)
        modeller.addHydrogens(forcefield=system_generator.forcefield)
        modeller.addSolvent(system_generator.forcefield,
                            model='tip3p',
                            padding=9.0 * unit.angstroms)
        solvated_topology = modeller.getTopology()
        solvated_positions = modeller.getPositions()
        solvated_positions = unit.quantity.Quantity(value=np.array([
            list(atom_pos) for atom_pos in
            solvated_positions.value_in_unit_system(unit.md_unit_system)
        ]),
                                                    unit=unit.nanometers)
        solvated_system = system_generator.create_system(solvated_topology)

        #now to create proposal
        top_proposal = proposal_engine.propose(
            current_system=solvated_system,
            current_topology=solvated_topology,
            current_mol_id=0,
            proposed_mol_id=1)
        new_positions, _ = geometry_engine.propose(top_proposal,
                                                   solvated_positions, beta)

        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(
                f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}"
            )
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol,
                                proposal_engine.non_offset_new_to_old_atom_map)

        return top_proposal, solvated_positions, new_positions

    else:
        vacuum_system = system_generator.create_system(old_topology)
        top_proposal = proposal_engine.propose(current_system=vacuum_system,
                                               current_topology=old_topology,
                                               current_mol_id=0,
                                               proposed_mol_id=1)
        new_positions, _ = geometry_engine.propose(top_proposal, old_positions,
                                                   beta)
        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(f"new_to_old: {top_proposal._new_to_old_atom_map}")
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol, top_proposal._new_to_old_atom_map)
        return top_proposal, old_positions, new_positions