Ejemplo n.º 1
0
def render_protein_residue_atom_mapping(topology_proposal,
                                        filename,
                                        width=1200,
                                        height=600):
    """
    wrap the `render_atom_mapping` method around protein point mutation topologies.
    TODO : make modification to `render_atom_mapping` so that the backbone atoms are not written in the output.

    arguments
        topology_proposal : perses.rjmc.topology_proposal.TopologyProposal object
            topology proposal of protein mutation
        filename : str
            filename to write the map
        width : int
            width of image
        height : int
            height of image 
    """
    from perses.utils.smallmolecules import render_atom_mapping
    oe_res_maps = {}
    for omm_new_idx, omm_old_idx in topology_proposal._new_to_old_atom_map.items(
    ):
        if omm_new_idx in topology_proposal._new_topology.residue_to_oemol_map.keys(
        ):
            try:
                oe_res_maps[topology_proposal._new_topology.residue_to_oemol_map[
                    omm_new_idx]] = topology_proposal._old_topology.residue_to_oemol_map[
                        omm_old_idx]
            except:
                pass

    render_atom_mapping(filename,
                        topology_proposal._old_topology.residue_oemol,
                        topology_proposal._new_topology.residue_oemol,
                        oe_res_maps)
Ejemplo n.º 2
0
def test_mapping_strength_levels(pairs_of_smiles=[('Cc1ccccc1','c1ccc(cc1)N'),('CC(c1ccccc1)','O=C(c1ccccc1)'),('Oc1ccccc1','Sc1ccccc1')],test=True):

    correct_results = {0:{'default': (3,2), 'weak':(3,2), 'strong':(4,3)},
                       1:{'default': (7,3), 'weak':(6,2), 'strong':(7,3)},
                       2:{'default': (1,1), 'weak':(1,1), 'strong':(2,2)}}

    mapping = ['weak','default','strong']

    for example in mapping:
        for index, (lig_a, lig_b) in enumerate(pairs_of_smiles):
            print(f"conducting {example} mapping with ligands {lig_a}, {lig_b}")
            initial_molecule = smiles_to_oemol(lig_a)
            proposed_molecule = smiles_to_oemol(lig_b)
            molecules = [Molecule.from_openeye(mol) for mol in [initial_molecule, proposed_molecule]]
            system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs,nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                                 small_molecule_forcefield = 'gaff-1.81', molecules=molecules, cache=None)
            proposal_engine = SmallMoleculeSetProposalEngine([initial_molecule, proposed_molecule], system_generator)
            initial_system, initial_positions, initial_topology = OEMol_to_omm_ff(initial_molecule, system_generator)
            print(f"running now with map strength {example}")
            proposal = proposal_engine.propose(initial_system, initial_topology, map_strength = example)
            print(lig_a, lig_b,'length OLD and NEW atoms',len(proposal.unique_old_atoms), len(proposal.unique_new_atoms))
            if test:
                render_atom_mapping(f'{index}-{example}.png', initial_molecule, proposed_molecule, proposal._new_to_old_atom_map)
                assert ( (len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) == correct_results[index][example]), f"the mapping failed, correct results are {correct_results[index][example]}"
                print(f"the mapping worked!!!")
            print()
Ejemplo n.º 3
0
def test_ring_breaking_detection():
    """
    Test the detection of ring-breaking transformations.

    """
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from openmoltools.openeye import iupac_to_oemol, generate_conformers
    molecule1 = iupac_to_oemol("naphthalene")
    molecule2 = iupac_to_oemol("benzene")
    molecule1 = generate_conformers(molecule1, max_confs=1)
    molecule2 = generate_conformers(molecule2, max_confs=1)

    # Allow ring breaking
    new_to_old_atom_map = SmallMoleculeSetProposalEngine._get_mol_atom_map(
        molecule1, molecule2, allow_ring_breaking=True)
    if not len(new_to_old_atom_map) > 0:
        filename = 'mapping-error.png'
        render_atom_mapping(filename, molecule1, molecule2,
                            new_to_old_atom_map)
        msg = 'Napthalene -> benzene transformation with allow_ring_breaking=True is not returning a valid mapping\n'
        msg += 'Wrote atom mapping to %s for inspection; please check this.' % filename
        msg += str(new_to_old_atom_map)
        raise Exception(msg)

    new_to_old_atom_map = SmallMoleculeSetProposalEngine._get_mol_atom_map(
        molecule1, molecule2, allow_ring_breaking=False)
    if not len(new_to_old_atom_map) == 0:
        filename = 'mapping-error.png'
        render_atom_mapping(filename, molecule1, molecule2,
                            new_to_old_atom_map)
        msg = 'Napthalene -> benzene transformation with allow_ring_breaking=False is erroneously allowing ring breaking\n'
        msg += 'Wrote atom mapping to %s for inspection; please check this.' % filename
        msg += str(new_to_old_atom_map)
        raise Exception(msg)
Ejemplo n.º 4
0
def test_no_h_map():
    """
    Test that the SmallMoleculeAtomMapper can generate maps that exclude hydrogens
    """
    from perses.tests.testsystems import KinaseInhibitorsTestSystem
    from perses.rjmc.topology_proposal import SmallMoleculeAtomMapper
    import itertools
    from perses.tests import utils
    from openeye import oechem
    kinase = KinaseInhibitorsTestSystem()
    molecules = kinase.molecules
    mapper = SmallMoleculeAtomMapper(molecules, prohibit_hydrogen_mapping=True)
    mapper.map_all_molecules()

    with open('mapperkinase_permissive.json', 'w') as outfile:
        json_string = mapper.to_json()
        outfile.write(json_string)

    molecule_smiles = mapper.smiles_list
    for molecule_pair in itertools.combinations(molecule_smiles, 2):
        index_1 = molecule_smiles.index(molecule_pair[0])
        index_2 = molecule_smiles.index(molecule_pair[1])
        mol_a = mapper.get_oemol_from_smiles(molecule_pair[0])
        mol_b = mapper.get_oemol_from_smiles(molecule_pair[1])
        #fresh_atom_maps, _ = mapper._map_atoms(mol_a, mol_b)
        stored_atom_maps = mapper.get_atom_maps(molecule_pair[0],
                                                molecule_pair[1])

        for i, atom_map in enumerate(stored_atom_maps):
            render_atom_mapping(
                "{}_{}_map{}_permissive.png".format(index_1, index_2, i),
                mol_b, mol_a, atom_map)

    mapper.generate_and_check_proposal_matrix()
Ejemplo n.º 5
0
def test_mapping_strength_levels(pairs_of_smiles=[('Cc1ccccc1', 'c1ccc(cc1)N'),
                                                  ('CC(c1ccccc1)',
                                                   'O=C(c1ccccc1)'),
                                                  ('Oc1ccccc1', 'Sc1ccccc1')],
                                 test=True):
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from perses.rjmc import topology_proposal
    gaff_xml_filename = get_data_filename('data/gaff.xml')

    correct_results = {
        0: {
            'default': (1, 0),
            'weak': (1, 0),
            'strong': (4, 3)
        },
        1: {
            'default': (7, 3),
            'weak': (5, 1),
            'strong': (7, 3)
        },
        2: {
            'default': (0, 0),
            'weak': (0, 0),
            'strong': (2, 2)
        }
    }

    mapping = ['weak', 'default', 'strong']

    for example in mapping:
        for index, (lig_a, lig_b) in enumerate(pairs_of_smiles):
            initial_molecule = generate_initial_molecule(lig_a)
            proposed_molecule = generate_initial_molecule(lig_b)
            system_generator = topology_proposal.SystemGenerator(
                [gaff_xml_filename])
            proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(
                [lig_a, lig_b], system_generator, map_strength=example)
            initial_system, initial_positions, initial_topology = OEMol_to_omm_ff(
                initial_molecule)
            proposal = proposal_engine.propose(initial_system,
                                               initial_topology)
            print(lig_a, lig_b, 'length OLD and NEW atoms',
                  len(proposal.unique_old_atoms),
                  len(proposal.unique_new_atoms))
            if test:
                assert ((len(proposal.unique_old_atoms),
                         len(proposal.unique_new_atoms)
                         ) == correct_results[index][example])
            render_atom_mapping(f'{index}-{example}.png', initial_molecule,
                                proposed_molecule,
                                proposal._new_to_old_atom_map)
Ejemplo n.º 6
0
def test_molecular_atom_mapping():
    """
    Test the creation of atom maps between pairs of molecules from the JACS benchmark set.

    """
    from openeye import oechem
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine, AtomMapper
    from itertools import combinations

    # Test mappings for JACS dataset ligands
    for dataset_name in [
            'CDK2'
    ]:  #, 'p38', 'Tyk2', 'Thrombin', 'PTP1B', 'MCL1', 'Jnk1', 'Bace']:
        # Read molecules
        dataset_path = 'data/schrodinger-jacs-datasets/%s_ligands.sdf' % dataset_name
        mol2_filename = resource_filename('perses', dataset_path)
        ifs = oechem.oemolistream(mol2_filename)
        molecules = list()
        for mol in ifs.GetOEGraphMols():
            molecules.append(oechem.OEGraphMol(mol))

        # Build atom map for some transformations.
        #for (molecule1, molecule2) in combinations(molecules, 2): # too slow
        molecule1 = molecules[0]
        for i, molecule2 in enumerate(molecules[1:]):
            new_to_old_atom_map = AtomMapper._get_mol_atom_map(
                molecule1, molecule2)
            # Make sure we aren't mapping hydrogens onto anything else
            atoms1 = [atom for atom in molecule1.GetAtoms()]
            atoms2 = [atom for atom in molecule2.GetAtoms()]
            #for (index2, index1) in new_to_old_atom_map.items():
            #    atom1, atom2 = atoms1[index1], atoms2[index2]
            #    if (atom1.GetAtomicNum()==1) != (atom2.GetAtomicNum()==1):
            filename = 'mapping-error-%d.png' % i
            render_atom_mapping(filename, molecule1, molecule2,
                                new_to_old_atom_map)
            #msg = 'Atom atomic number %d is being mapped to atomic number %d\n' % (atom1.GetAtomicNum(), atom2.GetAtomicNum())
            msg = 'molecule 1 : %s\n' % oechem.OECreateIsoSmiString(molecule1)
            msg += 'molecule 2 : %s\n' % oechem.OECreateIsoSmiString(molecule2)
            msg += 'Wrote atom mapping to %s for inspection; please check this.' % filename
            msg += str(new_to_old_atom_map)
            print(msg)
Ejemplo n.º 7
0
def run_setup(setup_options, serialize_systems=True, build_samplers=True):
    """
    Run the setup pipeline and return the relevant setup objects based on a yaml input file.
    Parameters
    ----------
    setup_options : dict
        result of loading yaml input file
    Returns
    -------
    setup_dict: dict
        {'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss}
        - 'topology_proposals':
    """
    phases = setup_options['phases']
    known_phases = ['complex', 'solvent', 'vacuum']
    for phase in phases:
        assert (
            phase in known_phases
        ), f"Unknown phase, {phase} provided. run_setup() can be used with {known_phases}"

    if 'use_given_geometries' not in list(setup_options.keys()):
        use_given_geometries = False
    else:
        assert type(setup_options['use_given_geometries']) == type(True)
        use_given_geometries = setup_options['use_given_geometries']

    if 'complex' in phases:
        _logger.info(f"\tPulling receptor (as pdb or mol2)...")
        # We'll need the protein PDB file (without missing atoms)
        try:
            protein_pdb_filename = setup_options['protein_pdb']
            assert protein_pdb_filename is not None
            receptor_mol2 = None
        except KeyError:
            try:
                receptor_mol2 = setup_options['receptor_mol2']
                assert receptor_mol2 is not None
                protein_pdb_filename = None
            except KeyError as e:
                print(
                    "Either protein_pdb or receptor_mol2 must be specified if running a complex simulation"
                )
                raise e
    else:
        protein_pdb_filename = None
        receptor_mol2 = None

    # And a ligand file containing the pair of ligands between which we will transform
    ligand_file = setup_options['ligand_file']
    _logger.info(f"\tdetected ligand file: {ligand_file}")

    # get the indices of ligands out of the file:
    old_ligand_index = setup_options['old_ligand_index']
    new_ligand_index = setup_options['new_ligand_index']
    _logger.info(
        f"\told ligand index: {old_ligand_index}; new ligand index: {new_ligand_index}"
    )

    _logger.info(f"\tsetting up forcefield files...")
    forcefield_files = setup_options['forcefield_files']

    if "timestep" in setup_options:
        if isinstance(setup_options['timestep'], float):
            timestep = setup_options['timestep'] * unit.femtoseconds
        else:
            timestep = setup_options['timestep']
        _logger.info(f"\ttimestep: {timestep}.")
    else:
        timestep = 1.0 * unit.femtoseconds
        _logger.info(f"\tno timestep detected: setting default as 1.0fs.")

    if "neq_splitting" in setup_options:
        neq_splitting = setup_options['neq_splitting']
        _logger.info(f"\tneq_splitting: {neq_splitting}")

        try:
            eq_splitting = setup_options['eq_splitting']
            _logger.info(f"\teq_splitting: {eq_splitting}")
        except KeyError as e:
            print(
                "If you specify a nonequilibrium splitting string, you must also specify an equilibrium one."
            )
            raise e

    else:
        eq_splitting = "V R O R V"
        neq_splitting = "V R O R V"
        _logger.info(
            f"\tno splitting strings specified: defaulting to neq: {neq_splitting}, eq: {eq_splitting}."
        )

    if "measure_shadow_work" in setup_options:
        measure_shadow_work = setup_options['measure_shadow_work']
        _logger.info(f"\tmeasuring shadow work: {measure_shadow_work}.")
    else:
        measure_shadow_work = False
        _logger.info(
            f"\tno measure_shadow_work specified: defaulting to False.")
    if isinstance(setup_options['pressure'], float):
        pressure = setup_options['pressure'] * unit.atmosphere
    else:
        pressure = setup_options['pressure']
    if isinstance(setup_options['temperature'], float):
        temperature = setup_options['temperature'] * unit.kelvin
    else:
        temperature = setup_options['temperature']
    if isinstance(setup_options['solvent_padding'], float):
        solvent_padding_angstroms = setup_options[
            'solvent_padding'] * unit.angstrom
    else:
        solvent_padding_angstroms = setup_options['solvent_padding']
    if isinstance(setup_options['ionic_strength'], float):
        ionic_strength = setup_options['ionic_strength'] * unit.molar
    else:
        ionic_strength = setup_options['ionic_strength']
    _logger.info(f"\tsetting pressure: {pressure}.")
    _logger.info(f"\tsetting temperature: {temperature}.")
    _logger.info(f"\tsetting solvent padding: {solvent_padding_angstroms}A.")
    _logger.info(f"\tsetting ionic strength: {ionic_strength}M.")

    setup_pickle_file = setup_options[
        'save_setup_pickle_as'] if 'save_setup_pickle_as' in list(
            setup_options) else None
    _logger.info(f"\tsetup pickle file: {setup_pickle_file}")
    trajectory_directory = setup_options['trajectory_directory']
    _logger.info(f"\ttrajectory directory: {trajectory_directory}")
    try:
        atom_map_file = setup_options['atom_map']
        with open(atom_map_file, 'r') as f:
            atom_map = {
                int(x.split()[0]): int(x.split()[1])
                for x in f.readlines()
            }
        _logger.info(f"\tsucceeded parsing atom map.")
    except Exception:
        atom_map = None
        _logger.info(f"\tno atom map specified: default to None.")

    if 'topology_proposal' not in list(setup_options.keys(
    )) or setup_options['topology_proposal'] is None:
        _logger.info(
            f"\tno topology_proposal specified; proceeding to RelativeFEPSetup...\n\n\n"
        )
        if 'set_solvent_box_dims_to_complex' in list(setup_options.keys(
        )) and setup_options['set_solvent_box_dims_to_complex']:
            set_solvent_box_dims_to_complex = True
        else:
            set_solvent_box_dims_to_complex = False

        _logger.info(
            f'Box dimensions: {setup_options["complex_box_dimensions"]} and {setup_options["solvent_box_dimensions"]}'
        )
        fe_setup = RelativeFEPSetup(
            ligand_file,
            old_ligand_index,
            new_ligand_index,
            forcefield_files,
            phases=phases,
            protein_pdb_filename=protein_pdb_filename,
            receptor_mol2_filename=receptor_mol2,
            pressure=pressure,
            temperature=temperature,
            solvent_padding=solvent_padding_angstroms,
            spectator_filenames=setup_options['spectators'],
            map_strength=setup_options['map_strength'],
            atom_expr=setup_options['atom_expr'],
            bond_expr=setup_options['bond_expr'],
            atom_map=atom_map,
            neglect_angles=setup_options['neglect_angles'],
            anneal_14s=setup_options['anneal_1,4s'],
            small_molecule_forcefield=setup_options[
                'small_molecule_forcefield'],
            small_molecule_parameters_cache=setup_options[
                'small_molecule_parameters_cache'],
            trajectory_directory=trajectory_directory,
            trajectory_prefix=setup_options['trajectory_prefix'],
            nonbonded_method=setup_options['nonbonded_method'],
            complex_box_dimensions=setup_options['complex_box_dimensions'],
            solvent_box_dimensions=setup_options['solvent_box_dimensions'],
            ionic_strength=ionic_strength,
            remove_constraints=setup_options['remove_constraints'],
            use_given_geometries=use_given_geometries)

        _logger.info(f"\twriting pickle output...")
        if setup_pickle_file is not None:
            with open(
                    os.path.join(os.getcwd(), trajectory_directory,
                                 setup_pickle_file), 'wb') as f:
                try:
                    pickle.dump(fe_setup, f)
                    _logger.info(f"\tsuccessfully dumped pickle.")
                except Exception as e:
                    print(e)
                    print("\tUnable to save setup object as a pickle")

            _logger.info(
                f"\tsetup is complete.  Writing proposals and positions for each phase to top_prop dict..."
            )
        else:
            _logger.info(
                f"\tsetup is complete.  Omitted writing proposals and positions for each phase to top_prop dict..."
            )

        top_prop = dict()
        for phase in phases:
            top_prop[f'{phase}_topology_proposal'] = getattr(
                fe_setup, f'{phase}_topology_proposal')
            top_prop[f'{phase}_geometry_engine'] = getattr(
                fe_setup, f'_{phase}_geometry_engine')
            top_prop[f'{phase}_old_positions'] = getattr(
                fe_setup, f'{phase}_old_positions')
            top_prop[f'{phase}_new_positions'] = getattr(
                fe_setup, f'{phase}_new_positions')
            top_prop[f'{phase}_added_valence_energy'] = getattr(
                fe_setup, f'_{phase}_added_valence_energy')
            top_prop[f'{phase}_subtracted_valence_energy'] = getattr(
                fe_setup, f'_{phase}_subtracted_valence_energy')
            top_prop[f'{phase}_logp_proposal'] = getattr(
                fe_setup, f'_{phase}_logp_proposal')
            top_prop[f'{phase}_logp_reverse'] = getattr(
                fe_setup, f'_{phase}_logp_reverse')
            top_prop[f'{phase}_forward_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_forward_neglected_angles')
            top_prop[f'{phase}_reverse_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_reverse_neglected_angles')

        top_prop['ligand_oemol_old'] = fe_setup._ligand_oemol_old
        top_prop['ligand_oemol_new'] = fe_setup._ligand_oemol_new
        top_prop[
            'non_offset_new_to_old_atom_map'] = fe_setup.non_offset_new_to_old_atom_map
        _logger.info(f"\twriting atom_mapping.png")
        atom_map_outfile = os.path.join(os.getcwd(), trajectory_directory,
                                        'atom_mapping.png')

        if 'render_atom_map' in list(
                setup_options.keys()) and setup_options['render_atom_map']:
            render_atom_mapping(atom_map_outfile, fe_setup._ligand_oemol_old,
                                fe_setup._ligand_oemol_new,
                                fe_setup.non_offset_new_to_old_atom_map)

    else:
        _logger.info(f"\tloading topology proposal from yaml setup options...")
        top_prop = np.load(setup_options['topology_proposal']).item()

    n_steps_per_move_application = setup_options[
        'n_steps_per_move_application']
    _logger.info(
        f"\t steps per move application: {n_steps_per_move_application}")
    trajectory_directory = setup_options['trajectory_directory']

    trajectory_prefix = setup_options['trajectory_prefix']
    _logger.info(f"\ttrajectory prefix: {trajectory_prefix}")

    if 'atom_selection' in setup_options:
        atom_selection = setup_options['atom_selection']
        _logger.info(f"\tatom selection detected: {atom_selection}")
    else:
        _logger.info(f"\tno atom selection detected: default to all.")
        atom_selection = 'all'

    if setup_options['fe_type'] == 'neq':
        _logger.info(f"\tInstantiating nonequilibrium switching FEP")
        n_equilibrium_steps_per_iteration = setup_options[
            'n_equilibrium_steps_per_iteration']
        ncmc_save_interval = setup_options['ncmc_save_interval']
        write_ncmc_configuration = setup_options['write_ncmc_configuration']
        if setup_options['LSF']:
            _internal_parallelism = {
                'library': ('dask', 'LSF'),
                'num_processes': setup_options['processes']
            }
        else:
            _internal_parallelism = None

        ne_fep = dict()
        for phase in phases:
            _logger.info(f"\t\tphase: {phase}")
            hybrid_factory = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

            if build_samplers:
                ne_fep[phase] = SequentialMonteCarlo(
                    factory=hybrid_factory,
                    lambda_protocol=setup_options['lambda_protocol'],
                    temperature=temperature,
                    trajectory_directory=trajectory_directory,
                    trajectory_prefix=f"{trajectory_prefix}_{phase}",
                    atom_selection=atom_selection,
                    timestep=timestep,
                    eq_splitting_string=eq_splitting,
                    neq_splitting_string=neq_splitting,
                    collision_rate=setup_options['ncmc_collision_rate_ps'],
                    ncmc_save_interval=ncmc_save_interval,
                    internal_parallelism=_internal_parallelism)

        print("Nonequilibrium switching driver class constructed")

        return {'topology_proposals': top_prop, 'ne_fep': ne_fep}

    else:
        _logger.info(f"\tno nonequilibrium detected.")
        htf = dict()
        hss = dict()
        _logger.info(f"\tcataloging HybridTopologyFactories...")

        for phase in phases:
            _logger.info(f"\t\tphase: {phase}:")
            #TODO write a SAMSFEP class that mirrors NonequilibriumSwitchingFEP
            _logger.info(
                f"\t\twriting HybridTopologyFactory for phase {phase}...")
            htf[phase] = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

        for phase in phases:
            # Define necessary vars to check energy bookkeeping
            _top_prop = top_prop['%s_topology_proposal' % phase]
            _htf = htf[phase]
            _forward_added_valence_energy = top_prop['%s_added_valence_energy'
                                                     % phase]
            _reverse_subtracted_valence_energy = top_prop[
                '%s_subtracted_valence_energy' % phase]

            if not use_given_geometries:
                zero_state_error, one_state_error = validate_endstate_energies(
                    _top_prop,
                    _htf,
                    _forward_added_valence_energy,
                    _reverse_subtracted_valence_energy,
                    beta=1.0 / (kB * temperature),
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD
                )  #, trajectory_directory=f'{xml_directory}{phase}')
                _logger.info(f"\t\terror in zero state: {zero_state_error}")
                _logger.info(f"\t\terror in one state: {one_state_error}")
            else:
                _logger.info(
                    f"'use_given_geometries' was passed to setup; skipping endstate validation"
                )

            #TODO expose more of these options in input
            if build_samplers:

                n_states = setup_options['n_states']
                _logger.info(f"\tn_states: {n_states}")
                if 'n_replicas' not in setup_options:
                    n_replicas = n_states
                else:
                    n_replicas = setup_options['n_replicas']

                checkpoint_interval = setup_options['checkpoint_interval']

                # generating lambda protocol
                lambda_protocol = LambdaProtocol(
                    functions=setup_options['protocol-type'])
                _logger.info(
                    f'Using lambda protocol : {setup_options["protocol-type"]}'
                )

                if atom_selection:
                    selection_indices = htf[phase].hybrid_topology.select(
                        atom_selection)
                else:
                    selection_indices = None

                storage_name = str(trajectory_directory) + '/' + str(
                    trajectory_prefix) + '-' + str(phase) + '.nc'
                _logger.info(f'\tstorage_name: {storage_name}')
                _logger.info(f'\tselection_indices {selection_indices}')
                _logger.info(f'\tcheckpoint interval {checkpoint_interval}')
                reporter = MultiStateReporter(
                    storage_name,
                    analysis_particle_indices=selection_indices,
                    checkpoint_interval=checkpoint_interval)

                if phase == 'vacuum':
                    endstates = False
                else:
                    endstates = True

                if setup_options['fe_type'] == 'fah':
                    _logger.info('SETUP FOR FAH DONE')
                    return {
                        'topology_proposals': top_prop,
                        'hybrid_topology_factories': htf
                    }

                if setup_options['fe_type'] == 'sams':
                    hss[phase] = HybridSAMSSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'],
                        online_analysis_minimum_iterations=10,
                        flatness_criteria=setup_options['flatness-criteria'],
                        gamma0=setup_options['gamma0'])
                    hss[phase].setup(n_states=n_states,
                                     n_replicas=n_replicas,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
                elif setup_options['fe_type'] == 'repex':
                    hss[phase] = HybridRepexSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'])
                    hss[phase].setup(n_states=n_states,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
            else:
                _logger.info(f"omitting sampler construction")

            if serialize_systems:
                # save the systems and the states
                pass

                _logger.info('WRITING OUT XML FILES')
                #old_thermodynamic_state, new_thermodynamic_state, hybrid_thermodynamic_state, _ = generate_endpoint_thermodynamic_states(htf[phase].hybrid_system, _top_prop)

                xml_directory = f'{setup_options["trajectory_directory"]}/xml/'
                if not os.path.exists(xml_directory):
                    os.makedirs(xml_directory)
                from perses.utils import data
                _logger.info('WRITING OUT XML FILES')
                _logger.info(f'Saving the hybrid, old and new system to disk')
                data.serialize(
                    htf[phase].hybrid_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-hybrid-system.gz'
                )
                data.serialize(
                    htf[phase]._old_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-old-system.gz'
                )
                data.serialize(
                    htf[phase]._new_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-new-system.gz'
                )

        return {
            'topology_proposals': top_prop,
            'hybrid_topology_factories': htf,
            'hybrid_samplers': hss
        }
Ejemplo n.º 8
0
def run_neq_fah_setup(ligand_file,
                      old_ligand_index,
                      new_ligand_index,
                      forcefield_files,
                      trajectory_directory,
                      complex_box_dimensions=(9.8, 9.8, 9.8),
                      solvent_box_dimensions=(3.5, 3.5, 3.5),
                      timestep=4.0,
                      eq_splitting='V R O R V',
                      neq_splitting='V R H O R V',
                      measure_shadow_work=False,
                      pressure=1.0,
                      temperature=300. * unit.kelvin,
                      solvent_padding=9 * unit.angstroms,
                      phases=['complex', 'solvent', 'vacuum'],
                      phase_project_ids=None,
                      protein_pdb=None,
                      receptor_mol2=None,
                      small_molecule_forcefield='openff-1.2.0',
                      small_molecule_parameters_cache=None,
                      atom_expression=['IntType'],
                      bond_expression=['DefaultBonds'],
                      spectators=None,
                      neglect_angles=False,
                      anneal_14s=False,
                      nonbonded_method='PME',
                      map_strength=None,
                      softcore_v2=False,
                      save_setup_pickle_as=None,
                      render_atom_map=False,
                      alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS,
                      num_equilibration_iterations=1000,
                      num_equilibration_steps_per_iteration=250,
                      nsteps_eq=250000,
                      nsteps_neq=250000,
                      fe_type='fah',
                      collision_rate=1. / unit.picoseconds,
                      collision_rate_setup=90. / unit.picoseconds,
                      constraint_tolerance=1e-6,
                      n_steps_per_move_application=250,
                      globalVarFreq=250,
                      setup='small_molecule',
                      protein_kwargs=None,
                      ionic_strength=0.15 * unit.molar,
                      remove_constraints='not water',
                      **kwargs):
    """
    main execution function that will:
        - create a directory for each phase according to the `trajectory_directory` argument
        - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file`
        - create topology proposals for all phases
        - create/serialize hybrid factories or all phases (and validate endstates)
        - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases
        - relax generated structures with a minimizer and LangevinIntegrator for all phases
        - create/serialize a state associated with the relaxed structures
        - create/serialize a `core.xml` object for all phases


    >>> run_neq_fah_setup('ligand.sdf', 0, 1,['amber/ff14SB.xml','amber/tip3p_standard.xml','amber/tip3p_HFE_multivalent.xml'],'RUN0',protein_pdb='protein.pdb', phases=['complex','solvent','vacuum'],phase_project_ids={'complex':14320,'solvent':14321,'vacuum':'vacuum'})

    arguments
        ligand_file : str
            .sdf (or any openeye-readable) file containing ligand labeled indices and structures
        old_ligand_index : int
            index of the old ligand
        new_ligand_index : int
            inded of the new ligand
        forcefield_files : list of str
            list of forcefields to use for complex/solvent parameterization
        trajectory_directory : str
            RUNXXX for FAH deployment
        complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8)
            define box dimensions of complex phase (in nm)
        solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5)
            define box dimensions of solvent phase (in nm)
        timestep : float, default=4.
            step size of nonequilibrium integration
        eq_splitting : str, default = 'V R O R V'
            splitting string of relaxation dynamics
        neq_splitting : str, default = 'V R H O R V'
            splitting string of nonequilibrium dynamics
        measure_shadow_work : bool, default=False
            True/False to measure shadow work
        pressure: float, default=1.
            pressure in atms for simulation
        temperature: simtk.unit.Quantity, default=300.*unit.kelvin,
            temperature in K for simulation
        phases: list, default = ['complex','solvent','vacuum','apo']
            phases to run, where allowed phases are:
            'complex','solvent','vacuum','apo'
        protein_pdb : str, default=None
            name of protein file
        receptor_mol2 : str, default=None
            name of receptor file if protein_pdb not provided
        small_molecule_forcefield : str, default='openff-1.0.0'
            small molecule forcefield filename
        small_molecule_parameters_cache : str, default=None
            cache file containing small molecule forcefield files
        atom_expression : list default=['IntType']
            list of string for atom mapping criteria. see oechem.OEExprOpts for options
        bond_expression : list default=['DefaultBonds']
            list of string for bond mapping criteria. see oechem.OEExprOpts for options
        map_strength : 'str', default=None
            atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used.
        spectators : str, default=None
            path to any non-alchemical atoms in simulation
        neglect_angles : bool, default=False
            wether to use angle terms in building of unique-new groups. False is strongly recommended
        anneal_14s : bool, default False
            Whether to anneal 1,4 interactions over the protocol;
        nonbonded_method : str, default='PME'
            nonbonded method to use
        softcore_v2=bool, default=False
            wether to use v2 softcore
        alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS
            alchemical functions for transformation
        num_equilibration_iterations: int, default=1000
            number of equilibration steps to do during set up
        num_equilibration_steps_per_iteration: int, default=250,
            number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP
        nsteps_eq : int, default=250000
            number of normal MD steps to take for FAH integrator for PRODUCTION
        nsteps_neq : int, default=250000
            number of nonequilibrium steps to take for FAH integrator for PRODUCTION
        fe_type : str, default='fah'
            tells setup_relative_calculation() to use the fah pipeline
        collision_rate : simtk.unit.Quantity, default=1./unit.picosecond
            collision_rate for PRODUCTION
        collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond
        constraint_tolerance : float, default=1e-6
            tolerance to use for constraints
        n_steps_per_move_application : int default=250
            number of equilibrium steps to take per move
    """
    from perses.utils import data
    if isinstance(temperature, float) or isinstance(temperature, int):
        temperature = temperature * unit.kelvin

    if isinstance(timestep, float) or isinstance(timestep, int):
        timestep = timestep * unit.femtosecond

    if isinstance(pressure, float) or isinstance(pressure, int):
        pressure = pressure * unit.atmosphere

    #turn all of the args into a dict for passing to run_setup
    # HBM - this doesn't feel particularly safe
    # Also, this means that the function can't run without being called by run(), as we are requiring things that aren't arguments to this function, like 'solvent_projid'...etc
    setup_options = locals()
    if 'kwargs' in setup_options.keys(
    ):  #update the setup options w.r.t. kwargs
        setup_options.update(setup_options['kwargs'])
    if protein_kwargs is not None:  #update the setup options w.r.t. the protein kwargs
        setup_options.update(setup_options['protein_kwargs'])
        if 'apo_box_dimensions' not in list(setup_options.keys()):
            setup_options['apo_box_dimensions'] = setup_options[
                'complex_box_dimensions']

    #setups_allowed
    setups_allowed = ['small_molecule', 'protein']
    assert setup in setups_allowed, f"setup {setup} not in setups_allowed: {setups_allowed}"

    # check there is a project_id for each phase
    for phase in phases:
        assert (
            phase in phase_project_ids
        ), f"Phase {phase} requested, but not in phase_project_ids {phase_project_ids.keys()}"

    #some modification for fah-specific functionality:
    setup_options['trajectory_prefix'] = None
    setup_options['anneal_1,4s'] = False
    from perses.utils.openeye import generate_expression
    setup_options['atom_expr'] = generate_expression(
        setup_options['atom_expression'])
    setup_options['bond_expr'] = generate_expression(
        setup_options['bond_expression'])

    #run the run_setup to generate topology proposals and htfs
    _logger.info(f"spectators: {setup_options['spectators']}")
    if setup == 'small_molecule':
        from perses.app.setup_relative_calculation import run_setup
        setup_dict = run_setup(setup_options,
                               serialize_systems=False,
                               build_samplers=False)
        topology_proposals = setup_dict['topology_proposals']
        htfs = setup_dict['hybrid_topology_factories']
    elif setup == 'protein':
        from perses.app.relative_point_mutation_setup import PointMutationExecutor
        setup_engine = PointMutationExecutor(**setup_options)
        topology_proposals = {
            'complex': setup_engine.get_complex_htf()._topology_proposal,
            'apo': setup_engine.get_apo_htf()._topology_proposal
        }
        htfs = {
            'complex': setup_engine.get_complex_htf(),
            'apo': setup_engine.get_apo_htf()
        }

    #create solvent and complex directories
    for phase in htfs.keys():
        _logger.info(f'Setting up phase {phase}')
        phase_dir = f"{phase_project_ids[phase]}/RUNS"
        dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory)
        if not os.path.exists(dir):
            os.makedirs(dir)

        # TODO - replace this with actually saving the importand part of the HTF
        np.savez_compressed(f'{dir}/htf', htfs[phase])

        #serialize the hybrid_system
        data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2")

        #make and serialize an integrator
        integrator = make_neq_integrator(**setup_options)
        data.serialize(integrator, f"{dir}/integrator.xml")

        #create and serialize a state
        try:
            state = relax_structure(
                temperature=temperature,
                system=htfs[phase].hybrid_system,
                positions=htfs[phase].hybrid_positions,
                nequil=num_equilibration_iterations,
                n_steps_per_iteration=num_equilibration_steps_per_iteration,
                collision_rate=collision_rate_setup,
                **kwargs)

            data.serialize(state, f"{dir}/state.xml.bz2")
        except Exception as e:
            _logger.warning(e)
            passed = False
        else:
            passed = True

        pos = state.getPositions(asNumpy=True)
        pos = np.asarray(pos)

        import mdtraj as md
        top = htfs[phase].hybrid_topology
        np.save(f'{dir}/hybrid_topology', top)
        traj = md.Trajectory(pos, top)
        traj.remove_solvent(exclude=['CL', 'NA'], inplace=True)
        traj.save(f'{dir}/hybrid_{phase}.pdb')

        #lastly, make a core.xml
        ###
        nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq
        ncycles = 1
        nsteps_per_ps = 250
        nsteps = ncycles * nsteps_per_cycle
        make_core_file(numSteps=nsteps,
                       xtcFreq=1000 * nsteps_per_ps,
                       globalVarFreq=10 * nsteps_per_ps,
                       directory=dir)

        #create a logger for reference
        # TODO - add more details to this
        references = {
            'start_ligand': old_ligand_index,
            'end_ligand': new_ligand_index,
            'protein_pdb': protein_pdb,
            'passed_strucutre_relax': passed
        }

        np.save(f'{dir}/references', references)

        tp = topology_proposals
        from perses.utils.smallmolecules import render_atom_mapping
        atom_map_filename = f'{dir}/atom_map.png'
        if setup == 'protein':
            from perses.utils.smallmolecules import render_protein_residue_atom_mapping
            render_protein_residue_atom_mapping(tp['apo'], atom_map_filename)
        else:
            old_ligand_oemol, new_ligand_oemol = tp['ligand_oemol_old'], tp[
                'ligand_oemol_new']
            _map = tp['non_offset_new_to_old_atom_map']
            render_atom_mapping(atom_map_filename, old_ligand_oemol,
                                new_ligand_oemol, _map)
Ejemplo n.º 9
0
def run_neq_fah_setup(ligand_file,
                      old_ligand_index,
                      new_ligand_index,
                      forcefield_files,
                      trajectory_directory,
                      complex_box_dimensions=(9.8, 9.8, 9.8),
                      solvent_box_dimensions=(3.5, 3.5, 3.5),
                      timestep=4.0 * unit.femtosecond,
                      eq_splitting='V R O R V',
                      neq_splitting='V R H O R V',
                      measure_shadow_work=False,
                      pressure=1.0,
                      temperature=300,
                      solvent_padding=9 * unit.angstroms,
                      phases=['complex', 'solvent', 'vacuum'],
                      protein_pdb=None,
                      receptor_mol2=None,
                      small_molecule_forcefield='openff-1.0.0',
                      small_molecule_parameters_cache=None,
                      atom_expression=['IntType'],
                      bond_expression=['DefaultBonds'],
                      spectators=None,
                      neglect_angles=False,
                      anneal_14s=False,
                      nonbonded_method='PME',
                      map_strength=None,
                      softcore_v2=False,
                      save_setup_pickle_as=None,
                      render_atom_map=False,
                      alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS,
                      num_equilibration_iterations=1000,
                      num_equilibration_steps_per_iteration=250,
                      nsteps_eq=250000,
                      nsteps_neq=250000,
                      fe_type='fah',
                      collision_rate=1. / unit.picoseconds,
                      collision_rate_setup=90. / unit.picoseconds,
                      constraint_tolerance=1e-6,
                      n_steps_per_move_application=250,
                      globalVarFreq=250,
                      **kwargs):
    """
    main execution function that will:
        - create a directory for each phase according to the `trajectory_directory` argument
        - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file`
        - create topology proposals for all phases
        - create/serialize hybrid factories or all phases (and validate endstates)
        - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases
        - relax generated structures with a minimizer and LangevinIntegrator for all phases
        - create/serialize a state associated with the relaxed structures
        - create/serialize a `core.xml` object for all phases

    arguments
        ligand_file : str
            .sdf (or any openeye-readable) file containing ligand labeled indices and structures
        old_ligand_index : int
            index of the old ligand
        new_ligand_index : int
            inded of the new ligand
        forcefield_files : list of str
            list of forcefields to use for complex/solvent parameterization
        trajectory_directory : str
            RUNXXX for FAH deployment
        complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8)
            define box dimensions of complex phase
        solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5)
            define box dimensions of solvent phase
        timestep : simtk.unit.Quantity, default=4.*unit.femtosecond
            step size of nonequilibrium integration
        eq_splitting : str, default = 'V R O R V'
            splitting string of relaxation dynamics
        neq_splitting : str, default = 'V R H O R V'
            splitting string of nonequilibrium dynamics
        measure_shadow_work : bool, default=False
            True/False to measure shadow work
        pressure: float, default=1.
            pressure in atms for simulation
        temperature: float, default=300.,
            temperature in K for simulation
        phases: list, default = ['complex','solvent','vacuum']
            phases to run, where allowed phases are 'complex','solvent','vacuum'
        protein_pdb : str, default=None
            name of protein file
        receptor_mol2 : str, default=None
            name of receptor file if protein_pdb not provided
        small_molecule_forcefield : str, default='openff-1.0.0'
            small molecule forcefield filename
        small_molecule_parameters_cache : str, default=None
            cache file containing small molecule forcefield files
        atom_expression : list default=['IntType']
            list of string for atom mapping criteria. see oechem.OEExprOpts for options
        bond_expression : list default=['DefaultBonds']
            list of string for bond mapping criteria. see oechem.OEExprOpts for options
        map_strength : 'str', default=None
            atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used.
        spectators : str, default=None
            path to any non-alchemical atoms in simulation
        neglect_angles : bool, default=False
            wether to use angle terms in building of unique-new groups. False is strongly recommended
        anneal_14s : bool, default False
            Whether to anneal 1,4 interactions over the protocol;
        nonbonded_method : str, default='PME'
            nonbonded method to use
        softcore_v2=bool, default=False
            wether to use v2 softcore
        alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS
            alchemical functions for transformation
        num_equilibration_iterations: int, default=1000
            number of equilibration steps to do during set up
        num_equilibration_steps_per_iteration: int, default=250,
            number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP
        nsteps_eq : int, default=250000
            number of normal MD steps to take for FAH integrator for PRODUCTION
        nsteps_neq : int, default=250000
            number of nonequilibrium steps to take for FAH integrator for PRODUCTION
        fe_type : str, default='fah'
            tells setup_relative_calculation() to use the fah pipeline
        collision_rate : simtk.unit.Quantity, default=1./unit.picosecond
            collision_rate for PRODUCTION
        collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond
        constraint_tolerance : float, default=1e-6
            tolerance to use for constraints
        n_steps_per_move_application : int default=250
            number of equilibrium steps to take per move
    """
    from perses.app.setup_relative_calculation import run_setup
    from perses.utils import data
    #turn all of the args into a dict for passing to run_setup
    setup_options = locals()
    if 'kwargs' in setup_options.keys():
        setup_options.update(setup_options['kwargs'])

    #some modification for fah-specific functionality:
    setup_options['trajectory_prefix'] = None
    setup_options['anneal_1,4s'] = False
    from perses.utils.openeye import generate_expression
    setup_options['atom_expr'] = generate_expression(
        setup_options['atom_expression'])
    setup_options['bond_expr'] = generate_expression(
        setup_options['bond_expression'])

    #run the run_setup to generate topology proposals and htfs
    _logger.info(f"spectators: {setup_options['spectators']}")
    setup_dict = run_setup(setup_options,
                           serialize_systems=False,
                           build_samplers=False)
    topology_proposals = setup_dict['topology_proposals']
    htfs = setup_dict['hybrid_topology_factories']

    #create solvent and complex directories
    for phase in htfs.keys():
        _logger.info(f'PHASE RUNNING: {phase}')
        _logger.info(f'Setting up phase {phase}')
        if phase == 'solvent':
            phase_dir = f"{setup_options['solvent_projid']}/RUNS"
        if phase == 'complex':
            phase_dir = f"{setup_options['complex_projid']}/RUNS"
        if phase == 'vacuum':
            phase_dir = 'VACUUM/RUNS'
        dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory)
        if not os.path.exists(dir):
            os.mkdir(dir)

        np.savez_compressed(f'{dir}/htf', htfs[phase])

        #serialize the hybrid_system
        data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2")

        #make and serialize an integrator
        integrator = make_neq_integrator(**setup_options)
        data.serialize(integrator, f"{dir}/integrator.xml")

        #create and serialize a state
        try:
            state = relax_structure(
                temperature=temperature,
                system=htfs[phase].hybrid_system,
                positions=htfs[phase].hybrid_positions,
                nequil=num_equilibration_iterations,
                n_steps_per_iteration=num_equilibration_steps_per_iteration,
                collision_rate=collision_rate_setup)

            data.serialize(state, f"{dir}/state.xml.bz2")
        except Exception as e:
            print(e)
            passed = False
        else:
            passed = True

        pos = state.getPositions(asNumpy=True)
        pos = np.asarray(pos)

        import mdtraj as md
        top = htfs[phase].hybrid_topology
        np.save(f'{dir}/hybrid_topology', top)
        traj = md.Trajectory(pos, top)
        traj.remove_solvent(exclude=['CL', 'NA'], inplace=True)
        traj.save(f'{dir}/hybrid_{phase}.pdb')

        #lastly, make a core.xml
        nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq
        ncycles = 1
        nsteps_per_ps = 250
        core_parameters = {
            'numSteps': ncycles * nsteps_per_cycle,
            'xtcFreq': 1000 * nsteps_per_ps,  # once per ns
            'xtcAtoms': 'solute',
            'precision': 'mixed',
            'globalVarFilename': 'globals.csv',
            'globalVarFreq': 10 * nsteps_per_ps,
        }
        # Serialize core.xml
        import dicttoxml
        with open(f'{dir}/core.xml', 'wt') as outfile:
            #core_parameters = create_core_parameters(phase)
            xml = dicttoxml.dicttoxml(core_parameters,
                                      custom_root='config',
                                      attr_type=False)
            from xml.dom.minidom import parseString
            dom = parseString(xml)
            outfile.write(dom.toprettyxml())

        #create a logger for reference
        references = {
            'start_ligand': old_ligand_index,
            'end_ligand': new_ligand_index,
            'protein_pdb': protein_pdb,
            'passed_strucutre_relax': passed
        }

        np.save(f'{dir}/references', references)

        tp = topology_proposals
        from perses.utils.smallmolecules import render_atom_mapping
        render_atom_mapping(f'{dir}/atom_map.png', tp['ligand_oemol_old'],
                            tp['ligand_oemol_new'],
                            tp['non_offset_new_to_old_atom_map'])
Ejemplo n.º 10
0
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene",
                                           proposed_mol_name="benzene",
                                           current_mol_smiles=None,
                                           proposed_mol_smiles=None,
                                           vacuum=False,
                                           render_atom_mapping=False):
    """
    This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles.
    The function will (by default) read the iupac names first.  If they are set to None, then it will attempt to read a set of current and new smiles.
    An atom mapping pdf will be generated if specified.
    Arguments
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule
    current_mol_smiles : str (default None)
        current mol smiles
    proposed_mol_smiles : str (default None)
        proposed mol smiles
    vacuum: bool (default False)
        whether to render a vacuum or solvated topology_proposal
    render_atom_mapping : bool (default False)
        whether to render the atom map of the current_mol_name and proposed_mol_name

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from openeye import oechem
    from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol
    from openmoltools import forcefield_generators
    import perses.utils.openeye as openeye
    from perses.utils.data import get_data_filename
    from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine
    import simtk.unit as unit
    from perses.rjmc.geometry import FFAllAngleGeometryEngine

    if current_mol_name != None and proposed_mol_name != None:
        try:
            old_oemol, new_oemol = iupac_to_oemol(
                current_mol_name), iupac_to_oemol(proposed_mol_name)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(
                f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!"
            )
    elif current_mol_smiles != None and proposed_mol_smiles != None:
        try:
            old_oemol, new_oemol = smiles_to_oemol(
                current_mol_smiles), smiles_to_oemol(proposed_mol_smiles)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(f"the variables are not compatible")
    else:
        raise Exception(
            f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings."
        )

    old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES(
        old_smiles, title="MOL")

    #correct the old positions
    old_positions = openeye.extractPositionsFromOEMol(old_oemol)
    old_positions = old_positions.in_units_of(unit.nanometers)

    new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES(
        new_smiles, title="NEW")

    ffxml = forcefield_generators.generateForceFieldFromMolecules(
        [old_oemol, new_oemol])

    old_oemol.SetTitle('MOL')
    new_oemol.SetTitle('MOL')

    old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol)
    new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol)

    if not vacuum:
        nonbonded_method = app.PME
        barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                             300.0 * unit.kelvin, 50)
    else:
        nonbonded_method = app.NoCutoff
        barostat = None

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    system_generator = SystemGenerator(
        [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'],
        barostat=barostat,
        forcefield_kwargs={
            'removeCMMotion': False,
            'nonbondedMethod': nonbonded_method,
            'constraints': app.HBonds,
            'hydrogenMass': 4.0 * unit.amu
        })
    system_generator._forcefield.loadFile(StringIO(ffxml))

    proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles],
                                                     system_generator,
                                                     residue_name='MOL')
    geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                               use_sterics=False,
                                               n_bond_divisions=1000,
                                               n_angle_divisions=180,
                                               n_torsion_divisions=360,
                                               verbose=True,
                                               storage=None,
                                               bond_softening_constant=1.0,
                                               angle_softening_constant=1.0,
                                               neglect_angles=False)

    if not vacuum:
        #now to solvate
        modeller = app.Modeller(old_topology, old_positions)
        hs = [
            atom for atom in modeller.topology.atoms()
            if atom.element.symbol in ['H']
            and atom.residue.name not in ['MOL', 'OLD', 'NEW']
        ]
        modeller.delete(hs)
        modeller.addHydrogens(forcefield=system_generator._forcefield)
        modeller.addSolvent(system_generator._forcefield,
                            model='tip3p',
                            padding=9.0 * unit.angstroms)
        solvated_topology = modeller.getTopology()
        solvated_positions = modeller.getPositions()
        solvated_positions = unit.quantity.Quantity(value=np.array([
            list(atom_pos) for atom_pos in
            solvated_positions.value_in_unit_system(unit.md_unit_system)
        ]),
                                                    unit=unit.nanometers)
        solvated_system = system_generator.build_system(solvated_topology)

        #now to create proposal
        top_proposal = proposal_engine.propose(
            current_system=solvated_system,
            current_topology=solvated_topology,
            current_mol=old_oemol,
            proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal,
                                                   solvated_positions, beta)

        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(
                f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}"
            )
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol,
                                proposal_engine.non_offset_new_to_old_atom_map)

        return top_proposal, solvated_positions, new_positions

    else:
        vacuum_system = system_generator.build_system(old_topology)
        top_proposal = proposal_engine.propose(current_system=vacuum_system,
                                               current_topology=old_topology,
                                               current_mol=old_oemol,
                                               proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal, old_positions,
                                                   beta)
        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(f"new_to_old: {top_proposal._new_to_old_atom_map}")
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol, top_proposal._new_to_old_atom_map)
        return top_proposal, old_positions, new_positions
Ejemplo n.º 11
0
    return graph


# In[ ]:

#os.system(f"rm *.nc")

# In[ ]:
mapping_strength = 'strong'
import pickle
from perses.utils.smallmolecules import render_atom_mapping
graph = generate_fully_connected_perturbation_graph()
print(f"graph edges: {graph.edges()}")
for pair in graph.edges():
    for phase in ['vac', 'sol']:
        print("Seralizing the system to ", f"{pair}_{phase}" + ".xml")
        with open(f"{pair[0]}_{pair[1]}.{phase}.{mapping_strength}_map.xml",
                  'w') as f:
            hybrid_system = graph.edges[pair][f"{phase}_htf"]._hybrid_system
            f.write(openmm.openmm.XmlSerializer.serialize(hybrid_system))

        htf = graph.edges[pair][f"{phase}_htf"]
        htf._topology_proposal._old_networkx_residue.remove_oemols_from_graph()
        htf._topology_proposal._new_networkx_residue.remove_oemols_from_graph()
        _map, oldmol, newmol = graph.edges[pair][f"map_oldmol_newmol"]
        render_atom_mapping(f"{pair[0]}_{pair[1]}.{mapping_strength}_map.png",
                            oldmol, newmol, _map)
        with open(f"{pair[0]}_{pair[1]}.{phase}.{mapping_strength}_map.pkl",
                  'wb') as f:
            pickle.dump(htf, f)