Example #1
0
 def minimize_relative(mol_a, mol_b):
     hc = minimizer.minimize_host_4d([mol_a, mol_b], solvent_system,
                                     solvent_coords, forcefield,
                                     solvent_box)
     return np.concatenate(
         [hc, get_romol_conf(mol_a),
          get_romol_conf(mol_b)])
Example #2
0
    def test_random_directory(self):
        with TemporaryDirectory(prefix="timemachine") as temp_dir:
            orig_dir = os.getcwd()
            os.chdir(temp_dir)
            try:
                # build a pair of alchemical ligands in a water box
                mol_a, mol_b, _, ff = (
                    hif2a_ligand_pair.mol_a,
                    hif2a_ligand_pair.mol_b,
                    hif2a_ligand_pair.core,
                    hif2a_ligand_pair.ff,
                )
                complex_system, complex_coords, complex_box, complex_top = build_water_system(2.6)

                # Creates a custom_ops.Context which triggers JIT
                minimize_host_4d([mol_a, mol_b], complex_system, complex_coords, ff, complex_box)
            finally:
                os.chdir(orig_dir)
Example #3
0
def get_solvent_phase_system(mol, ff):
    masses = np.array([a.GetMass() for a in mol.GetAtoms()])
    water_system, water_coords, water_box, water_topology = builders.build_water_system(3.0)
    water_box = water_box + np.eye(3) * 0.5  # add a small margin around the box for stability
    num_water_atoms = len(water_coords)
    afe = free_energy.AbsoluteFreeEnergy(mol, ff)
    ff_params = ff.get_ordered_params()
    ubps, params, masses, coords = afe.prepare_host_edge(ff_params, water_system, water_coords)

    host_coords = coords[:num_water_atoms]
    new_host_coords = minimizer.minimize_host_4d([mol], water_system, host_coords, ff, water_box)
    coords[:num_water_atoms] = new_host_coords

    return ubps, params, masses, coords, water_box
Example #4
0
    def binding_model(ff_params):

        dGs = []

        for host_system, host_coords, host_box in [
            (complex_system, complex_coords, complex_box),
            (solvent_system, solvent_coords, solvent_box),
        ]:

            # minimize the host to avoid clashes
            host_coords = minimizer.minimize_host_4d([mol_a], host_system,
                                                     host_coords, ff, host_box)

            unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                ff_params, host_system, host_coords)

            x0 = coords
            v0 = np.zeros_like(coords)
            client = CUDAPoolClient(1)

            harmonic_bond_potential = unbound_potentials[0]
            group_idxs = get_group_indices(
                get_bond_list(harmonic_bond_potential))

            temperature = 300.0
            pressure = 1.0

            integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses,
                                            seed)

            barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                          group_idxs, 25, seed)

            model = estimator.FreeEnergyModel(
                unbound_potentials,
                client,
                host_box,
                x0,
                v0,
                integrator,
                lambda_schedule,
                equil_steps,
                prod_steps,
                barostat,
            )

            dG, _ = estimator.deltaG(model, sys_params)
            dGs.append(dG)

        return dGs[0] - dGs[1]
Example #5
0
def test_minimizer():

    complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
        "tests/data/hif2a_nowater_min.pdb")

    suppl = Chem.SDMolSupplier("tests/data/ligands_40.sdf", removeHs=False)
    all_mols = [x for x in suppl]
    mol_a = all_mols[1]
    mol_b = all_mols[4]

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    # these methods will throw if the minimization failed
    minimizer.minimize_host_4d([mol_a, mol_b], complex_system, complex_coords,
                               ff, complex_box)
    minimizer.minimize_host_4d([mol_a], complex_system, complex_coords, ff,
                               complex_box)
    minimizer.minimize_host_4d([mol_b], complex_system, complex_coords, ff,
                               complex_box)
Example #6
0
def dock_and_equilibrate(
    host_pdbfile,
    guests_sdfile,
    max_lambda,
    insertion_steps,
    eq_steps,
    outdir,
    fewer_outfiles=False,
    constant_atoms=[],
):
    """Solvates a host, inserts guest(s) into solvated host, equilibrates

    Parameters
    ----------

    host_pdbfile: path to host pdb file to dock into
    guests_sdfile: path to input sdf with guests to pose/dock
    max_lambda: lambda value the guest should insert from or delete to
        (recommended: 1.0 for work calulation, 0.25 to stay close to original pose)
        (must be =1 for work calculation to be applicable)
    insertion_steps: how many steps to insert the guest over (recommended: 501)
    eq_steps: how many steps of equilibration to do after insertion (recommended: 15001)
    outdir: where to write output (will be created if it does not already exist)
    fewer_outfiles: if True, will only write frames for the equilibration, not insertion
    constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation
        (1-indexed, like PDB files)

    Output
    ------

    A pdb & sdf file for the last step of insertion
       (outdir/<guest_name>/<guest_name>_ins_<step>_[host.pdb/guest.sdf])
    A pdb & sdf file every 1000 steps of equilibration
       (outdir/<guest_name>/<guest_name>_eq_<step>_[host.pdb/guest.sdf])
    stdout corresponding to the files written noting the lambda value and energy
    stdout for each guest noting the work of transition, if applicable
    stdout for each guest noting how long it took to run

    Note
    ----
    The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of
    force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py]
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}
    MAX_LAMBDA = {max_lambda}
    INSERTION_STEPS = {insertion_steps}
    EQ_STEPS = {eq_steps}
    """)

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True)
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    os.remove(solvated_host_pdb)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    # Run the procedure
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(),
                                     dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units

        minimized_coords = minimizer.minimize_host_4d([guest_mol],
                                                      solvated_host_system,
                                                      solvated_host_coords, ff,
                                                      host_box)

        afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff)

        ups, sys_params, combined_masses, _ = afe.prepare_host_edge(
            ff.get_ordered_params(), solvated_host_system, minimized_coords)

        combined_bps = []
        for up, sp in zip(ups, sys_params):
            combined_bps.append(up.bind(sp))

        x0 = np.concatenate([minimized_coords, orig_guest_coords])
        v0 = np.zeros_like(x0)
        print("SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}")

        for atom_num in constant_atoms:
            combined_masses[atom_num - 1] += 50000

        seed = 2021
        intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses,
                                  seed).impl()

        u_impls = []
        for bp in combined_bps:
            bp_impl = bp.bound_impl(precision=np.float32)
            u_impls.append(bp_impl)

        ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

        # insert guest
        insertion_lambda_schedule = np.linspace(max_lambda, 0.0,
                                                insertion_steps)
        calc_work = True

        # collect a du_dl calculation once every other step
        subsample_interval = 1

        full_du_dls, _, _ = ctxt.multiple_steps(insertion_lambda_schedule,
                                                subsample_interval)
        step = len(insertion_lambda_schedule) - 1
        lamb = insertion_lambda_schedule[-1]
        ctxt.step(lamb)

        report.report_step(
            ctxt,
            step,
            lamb,
            host_box,
            combined_bps,
            u_impls,
            guest_name,
            insertion_steps,
            "INSERTION",
        )
        if not fewer_outfiles:
            host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10
            guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10
            report.write_frame(
                host_coords,
                solvated_host_mol,
                guest_coords,
                guest_mol,
                guest_name,
                outdir,
                str(step).zfill(len(str(insertion_steps))),
                "ins",
            )

        if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls):
            print("Not calculating work (too much force)")
            calc_work = False
            continue

        # Note: this condition only applies for ABFE, not RBFE
        if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001:
            print("Not calculating work (du_dl endpoints are not ~0)")
            calc_work = False

        if calc_work:
            work = np.trapz(full_du_dls,
                            insertion_lambda_schedule[::subsample_interval])
            print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}")

        # equilibrate
        for step in range(eq_steps):
            ctxt.step(0.00)
            if step % 1000 == 0:
                report.report_step(
                    ctxt,
                    step,
                    0.00,
                    host_box,
                    combined_bps,
                    u_impls,
                    guest_name,
                    eq_steps,
                    "EQUILIBRATION",
                )
                if (not fewer_outfiles) or (step == eq_steps - 1):
                    host_coords = ctxt.get_x_t()[:len(solvated_host_coords
                                                      )] * 10
                    guest_coords = ctxt.get_x_t()[len(solvated_host_coords
                                                      ):] * 10
                    report.write_frame(
                        host_coords,
                        solvated_host_mol,
                        guest_coords,
                        guest_mol,
                        guest_name,
                        outdir,
                        str(step).zfill(len(str(eq_steps))),
                        "eq",
                    )
            if step in (0, int(eq_steps / 2), eq_steps - 1):
                if report.too_much_force(ctxt, 0.00, host_box, combined_bps,
                                         u_impls):
                    break

        end_time = time.time()
        print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
    collision_rate = 1.0 / unit.picosecond
    n_moves = 2000
    barostat_interval = 5
    seed = 2021

    # thermodynamic parameters
    temperature = 300 * unit.kelvin
    pressure = 1.013 * unit.bar

    # generate an alchemical system of a waterbox + alchemical ligand:
    # effectively discard ligands by running in AbsoluteFreeEnergy mode at lambda = 1.0
    mol_a, _, core, ff = hif2a_ligand_pair.mol_a, hif2a_ligand_pair.mol_b, hif2a_ligand_pair.core, hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # define NPT ensemble
    potential_energy_model = PotentialEnergyModel(sys_params,
                                                  unbound_potentials)
    ensemble = NPTEnsemble(potential_energy_model, temperature, pressure)

    # define a thermostat
    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
Example #8
0
    def test_predict_absolute_conversion(self):
        """Just to verify that we can handle the most basic conversion RABFE prediction"""
        # Use the Simple Charges to verify determinism of model. Needed as one endpoint uses the ff definition
        forcefield = Forcefield.load_from_file("smirnoff_1_1_0_sc.py")

        # build the water system
        solvent_system, solvent_coords, solvent_box, solvent_topology = builders.build_water_system(
            4.0)

        temperature = 300.0
        pressure = 1.0
        dt = 2.5e-3

        client = CUDAPoolClient(NUM_GPUS)

        model = AbsoluteConversionModel(
            client,
            forcefield,
            solvent_system,
            construct_lambda_schedule(2),
            solvent_topology,
            temperature,
            pressure,
            dt,
            10,
            50,
            frame_filter=all_frames,
        )
        mol_a = hif2a_ligand_pair.mol_a
        mol_b = hif2a_ligand_pair.mol_b

        core_idxs = setup_relative_restraints_by_distance(mol_a, mol_b)

        ref_coords = get_romol_conf(mol_a)
        mol_coords = get_romol_conf(mol_b)  # original coords

        # Use core_idxs to generate
        R, t = rmsd.get_optimal_rotation_and_translation(
            x1=ref_coords[core_idxs[:, 1]],  # reference core atoms
            x2=mol_coords[core_idxs[:, 0]],  # mol core atoms
        )

        aligned_mol_coords = rmsd.apply_rotation_and_translation(
            mol_coords, R, t)
        solvent_coords = minimizer.minimize_host_4d([mol_b], solvent_system,
                                                    solvent_coords, forcefield,
                                                    solvent_box,
                                                    [aligned_mol_coords])
        solvent_x0 = np.concatenate([solvent_coords, aligned_mol_coords])

        ordered_params = forcefield.get_ordered_params()
        with temporary_working_dir() as temp_dir:
            dG, dG_err = model.predict(ordered_params,
                                       mol_b,
                                       solvent_x0,
                                       solvent_box,
                                       "prefix",
                                       core_idxs=core_idxs[:, 0],
                                       seed=2022)
            np.testing.assert_almost_equal(dG, 46.102816, decimal=5)
            np.testing.assert_equal(dG_err, 0.0)
            created_files = os.listdir(temp_dir)
            # 2 npz, 1 pdb and 1 npy per mol due to a->b and b->a
            self.assertEqual(len(created_files), 4)
            self.assertEqual(
                len([x for x in created_files if x.endswith(".pdb")]), 1)
            self.assertEqual(
                len([x for x in created_files if x.endswith(".npy")]), 1)
            self.assertEqual(
                len([x for x in created_files if x.endswith(".npz")]), 2)
Example #9
0
def test_molecular_ideal_gas():
    """


    References
    ----------
    OpenMM testIdealGas
    https://github.com/openmm/openmm/blob/d8ef57fed6554ec95684e53768188e1f666405c9/tests/TestMonteCarloBarostat.h#L86-L140
    """

    # simulation parameters
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    collision_rate = 1.0 / unit.picosecond
    n_moves = 10000
    barostat_interval = 5
    seed = 2021

    # thermodynamic parameters
    temperatures = np.array([300, 600, 1000]) * unit.kelvin
    pressure = 100.0 * unit.bar  # very high pressure, to keep the expected volume small

    # generate an alchemical system of a waterbox + alchemical ligand:
    # effectively discard ligands by running in AbsoluteFreeEnergy mode at lambda = 1.0
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    _unbound_potentials, _sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # drop the nonbonded potential
    unbound_potentials = _unbound_potentials[:-1]
    sys_params = _sys_params[:-1]

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    volume_trajs = []

    relative_tolerance = 1e-2
    initial_relative_box_perturbation = 2 * relative_tolerance

    n_molecules = complex_top.getNumResidues()

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    # expected volume
    md_pressure_unit = ENERGY_UNIT / DISTANCE_UNIT**3
    pressure_in_md = (
        pressure * unit.AVOGADRO_CONSTANT_NA).value_in_unit(md_pressure_unit)
    expected_volume_in_md = (n_molecules +
                             1) * BOLTZ * temperatures.value_in_unit(
                                 unit.kelvin) / pressure_in_md

    for i, temperature in enumerate(temperatures):

        # define a thermostat
        integrator = LangevinIntegrator(
            temperature.value_in_unit(unit.kelvin),
            timestep.value_in_unit(unit.picosecond),
            collision_rate.value_in_unit(unit.picosecond**-1),
            masses,
            seed,
        )
        integrator_impl = integrator.impl()

        v_0 = sample_velocities(masses * unit.amu, temperature)

        # rescale the box to be approximately the desired box volume already
        rescaler = CentroidRescaler(group_indices)
        initial_volume = compute_box_volume(complex_box)
        initial_center = compute_box_center(complex_box)
        length_scale = ((1 + initial_relative_box_perturbation) *
                        expected_volume_in_md[i] / initial_volume)**(1.0 / 3)
        new_coords = rescaler.scale_centroids(coords, initial_center,
                                              length_scale)
        new_box = complex_box * length_scale

        baro = custom_ops.MonteCarloBarostat(
            new_coords.shape[0],
            pressure.value_in_unit(unit.bar),
            temperature.value_in_unit(unit.kelvin),
            group_indices,
            barostat_interval,
            u_impls,
            seed,
        )

        ctxt = custom_ops.Context(new_coords,
                                  v_0,
                                  new_box,
                                  integrator_impl,
                                  u_impls,
                                  barostat=baro)
        vols = []
        for move in range(n_moves // barostat_interval):
            ctxt.multiple_steps(np.ones(barostat_interval))
            new_box = ctxt.get_box()
            volume = np.linalg.det(new_box)
            vols.append(volume)
        volume_trajs.append(vols)

    equil_time = len(volume_trajs[0]) // 2  # TODO: don't hard-code this?
    actual_volume_in_md = np.array(
        [np.mean(volume_traj[equil_time:]) for volume_traj in volume_trajs])

    np.testing.assert_allclose(actual=actual_volume_in_md,
                               desired=expected_volume_in_md,
                               rtol=relative_tolerance)
Example #10
0
 def minimize_absolute(mol):
     hc = minimizer.minimize_host_4d([mol], solvent_system, solvent_coords,
                                     forcefield, solvent_box)
     return np.concatenate([hc, get_romol_conf(mol)])
Example #11
0
def estimate_dG(
    transformation: RelativeTransformation,
    num_lambdas: int,
    num_steps_per_lambda: int,
    num_equil_steps: int,
):
    # build the protein system.
    complex_system, complex_coords, _, _, complex_box = builders.build_protein_system(
        path_to_protein)

    # build the water system.
    solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(
        4.0)

    stage_dGs = []

    ff = transformation.ff
    mol_a, mol_b = transformation.mol_a, transformation.mol_b
    core = transformation.core

    # TODO: measure performance of complex and solvent separately

    lambda_schedule = construct_lambda_schedule(num_lambdas)

    for stage, host_system, host_coords, host_box in [
        ("complex", complex_system, complex_coords, complex_box),
        ("solvent", solvent_system, solvent_coords, solvent_box),
    ]:

        print("Minimizing the host structure to remove clashes.")
        minimized_host_coords = minimizer.minimize_host_4d(
            mol_a, host_system, host_coords, ff, host_box)

        single_topology = topology.SingleTopology(mol_a, mol_b, core, ff)
        rfe = free_energy.RelativeFreeEnergy(single_topology)

        # solvent leg
        host_args = []
        for lambda_idx, lamb in enumerate(lambda_schedule):
            gpu_idx = lambda_idx % num_gpus
            host_args.append(
                (gpu_idx, lamb, host_system, minimized_host_coords, host_box,
                 num_equil_steps, num_steps_per_lambda))

        # one GPU job per lambda window
        print("submitting tasks to client!")
        do_work = partial(wrap_method, fxn=rfe.host_edge)
        futures = []
        for lambda_idx, lamb in enumerate(lambda_schedule):
            arg = (lamb, host_system, minimized_host_coords, host_box,
                   num_equil_steps, num_steps_per_lambda)
            futures.append(client.submit(do_work, arg))

        results = []
        for fut in futures:
            results.append(fut.result())

        def _mean_du_dlambda(result):
            """summarize result of rfe.host_edge into mean du/dl

            TODO: refactor where this analysis step occurs
            """
            bonded_du_dl, nonbonded_du_dl, _ = result
            return np.mean(bonded_du_dl + nonbonded_du_dl)

        dG_host = np.trapz([_mean_du_dlambda(x) for x in results],
                           lambda_schedule)
        stage_dGs.append(dG_host)

    pred = stage_dGs[0] - stage_dGs[1]
    return pred
Example #12
0
    suppl = Chem.SDMolSupplier("tests/data/benzene_fluorinated.sdf",
                               removeHs=False)
    all_mols = [x for x in suppl]
    mol_a = all_mols[0]
    mol_b = all_mols[1]

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    # the water system first.
    solvent_system, solvent_coords, solvent_box, omm_topology = builders.build_water_system(
        4.0)

    print("Minimizing the host structure to remove clashes.")
    minimized_solvent_coords = minimizer.minimize_host_4d([mol_a],
                                                          solvent_system,
                                                          solvent_coords, ff,
                                                          solvent_box)

    absolute_lambda_schedule = np.concatenate([
        np.linspace(0.0,
                    0.333,
                    cmd_args.num_absolute_windows -
                    cmd_args.num_absolute_windows // 3,
                    endpoint=False),
        np.linspace(0.333, 1.0, cmd_args.num_absolute_windows // 3),
    ])

    abs_dGs = []

    for idx, mol in enumerate([mol_a, mol_b]):
Example #13
0
def test_barostat_varying_pressure():
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    barostat_interval = 3
    collision_rate = 1.0 / unit.picosecond
    seed = 2021
    np.random.seed(seed)

    # Start out with a very large pressure
    pressure = 1000.0 * unit.atmosphere
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    lam = 1.0

    u_impls = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    v_0 = sample_velocities(masses * unit.amu, temperature)

    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        barostat_interval,
        u_impls,
        seed,
    )

    ctxt = custom_ops.Context(coords,
                              v_0,
                              complex_box,
                              integrator_impl,
                              u_impls,
                              barostat=baro)
    ctxt.multiple_steps(np.ones(1000) * lam)
    ten_atm_box = ctxt.get_box()
    ten_atm_box_vol = compute_box_volume(ten_atm_box)
    # Expect the box to shrink thanks to the barostat
    assert compute_box_volume(complex_box) - ten_atm_box_vol > 0.4

    # Set the pressure to 1 bar
    baro.set_pressure((1 * unit.atmosphere).value_in_unit(unit.bar))
    # Changing the barostat interval resets the barostat step.
    baro.set_interval(2)

    ctxt.multiple_steps(np.ones(2000) * lam)
    atm_box = ctxt.get_box()
    # Box will grow thanks to the lower pressure
    assert compute_box_volume(atm_box) > ten_atm_box_vol
Example #14
0
box = box + np.eye(3) * 0.1

host_bps, host_masses = openmm_deserializer.deserialize_system(system, cutoff=1.2)

combined_masses = np.concatenate([host_masses, ligand_masses_a, ligand_masses_b])


# minimize coordinates

# note: .py file rather than .offxml file
# note: _ccc suffix means "correctable charge corrections"
ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

# for RHFE we need to insert the reference ligand first, before inserting the
# decoupling ligand
minimized_coords = minimizer.minimize_host_4d([romol_a], system, host_coords, ff, box)

# note the order in which the coordinates are concatenated in this step --
#   in a later step we will need to combine recipes in the same order
# combined_coords = np.concatenate([host_coords, ligand_coords_a, ligand_coords_b])
combined_coords = np.concatenate([minimized_coords, ligand_coords_b])

num_host_atoms = host_coords.shape[0]

final_potentials = []
final_vjp_and_handles = []

# keep the bonded terms in the host the same.
# but we keep the nonbonded term for a subsequent modification
for bp in host_bps:
    if isinstance(bp, potentials.Nonbonded):
Example #15
0
def do_relative_docking(host_pdbfile, mol_a, mol_b, core, num_switches,
                        transition_steps):
    """Runs non-equilibrium switching jobs:
    1. Solvates a protein, minimizes w.r.t guest_A, equilibrates & spins off switching jobs
       (deleting guest_A while inserting guest_B) every 1000th step, calculates work.
    2. Does the same thing in solvent instead of protein
    Does num_switches switching jobs per leg.

    Parameters
    ----------

    host_pdbfile (str): path to host pdb file
    mol_a (rdkit mol): the starting ligand to swap from
    mol_b (rdkit mol): the ending ligand to swap to
    core (np.array[[int, int], [int, int], ...]): the common core atoms between mol_a and mol_b
    num_switches (int): number of switching trajectories to run per compound pair per leg
    transition_stpes (int): length of each switching trajectory

    Returns
    -------

    {str: float}: map of leg label to work values of switching mol_a to mol_b in that leg,
                  {'protein': [work values], 'solvent': [work_values]}

    Output
    ------

    stdout noting the step number, lambda value, and energy at various steps
    stdout noting the work of transition, if applicable
    stdout noting how long it took to run

    Note
    ----
    The work will not be calculated if any norm of force per atom exceeds 20000 kJ/(mol*nm)
       [MAX_NORM_FORCE defined in docking/report.py]
    The simulations won't run if the atom maps are not factorizable
    """

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    # Prepare water box
    print("Generating water box...")
    # TODO: water box probably doesn't need to be this big
    box_lengths = host_box[np.diag_indices(3)]
    water_box_width = min(box_lengths)
    (
        water_system,
        water_coords,
        water_box,
        water_topology,
    ) = builders.build_water_system(water_box_width)

    # it's okay if the water box here and the solvated protein box don't align -- they have PBCs

    # Run the procedure
    start_time = time.time()
    guest_name_a = mol_a.GetProp("_Name")
    guest_name_b = mol_b.GetProp("_Name")
    combined_name = guest_name_a + "-->" + guest_name_b

    guest_conformer_a = mol_a.GetConformer(0)
    orig_guest_coords_a = np.array(guest_conformer_a.GetPositions(),
                                   dtype=np.float64)
    orig_guest_coords_a = orig_guest_coords_a / 10  # convert to md_units

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    all_works = {}
    for system, coords, box, label in zip(
        [solvated_host_system, water_system],
        [solvated_host_coords, water_coords],
        [host_box, water_box],
        ["protein", "solvent"],
    ):
        # minimize w.r.t. both mol_a and mol_b?
        min_coords = minimizer.minimize_host_4d([mol_a], system, coords, ff,
                                                box)

        try:
            single_topology = topology.SingleTopology(mol_a, mol_b, core, ff)
            rfe = free_energy.RelativeFreeEnergy(single_topology)
            ups, sys_params, combined_masses, combined_coords = rfe.prepare_host_edge(
                ff.get_ordered_params(), system, min_coords)
        except topology.AtomMappingError as e:
            print(f"NON-FACTORIZABLE PAIR: {combined_name}")
            print(e)
            return {}

        combined_bps = []
        for up, sp in zip(ups, sys_params):
            combined_bps.append(up.bind(sp))
        all_works[label] = run_leg(
            combined_coords,
            combined_bps,
            combined_masses,
            box,
            combined_name,
            label,
            num_switches,
            transition_steps,
        )
        end_time = time.time()
        print(
            f"{combined_name} {label} leg time:",
            "%.2f" % (end_time - start_time),
            "seconds",
        )
    return all_works
Example #16
0
def test_barostat_partial_group_idxs():
    """Verify that the barostat can handle a subset of the molecules
    rather than all of them. This test only verify that it runs, not the behavior"""
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    barostat_interval = 3
    collision_rate = 1.0 / unit.picosecond
    seed = 2021
    np.random.seed(seed)

    pressure = 1.0 * unit.atmosphere
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    # Cut the number of groups in half
    group_indices = group_indices[len(group_indices) // 2:]
    lam = 1.0

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    v_0 = sample_velocities(masses * unit.amu, temperature)

    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        barostat_interval,
        u_impls,
        seed,
    )

    ctxt = custom_ops.Context(coords,
                              v_0,
                              complex_box,
                              integrator_impl,
                              u_impls,
                              barostat=baro)
    ctxt.multiple_steps(np.ones(1000) * lam)
Example #17
0
    def equilibrate_edges(
        self,
        edges: List[Tuple[Chem.Mol, Chem.Mol, np.ndarray]],
        lamb: float = 0.0,
        barostat_interval: int = 10,
        equilibration_steps: int = 100000,
        cache_path: str = "equilibration_cache.pkl",
    ):
        """
        edges: List of tuples with mol_a, mol_b, core
            Edges to equilibrate

        lamb: float
            Lambda value to equilibrate at. Uses Dual Topology to equilibrate

        barostat_interval: int
            Interval on which to run barostat during equilibration

        equilibration_steps: int
            Number of steps to equilibrate the edge for

        cache_path: string
            Path to look for existing cache or path to where to save cache. By default
            it will write out a pickle file in the local directory.

        Pre equilibrate edges and cache them for later use in predictions.

        Parallelized via the model client if possible
        """
        if not self.pre_equilibrate:
            return
        if os.path.isfile(cache_path):
            with open(cache_path, "rb") as ifs:
                self._equil_cache = load(ifs)
            print("Loaded Pre-equilibrated structures from cache")
            return
        futures = []
        ordered_params = self.ff.get_ordered_params()

        temperature = 300.0
        pressure = 1.0

        for stage, host_system, host_coords, host_box in [
            ("complex", self.complex_system, self.complex_coords,
             self.complex_box),
            ("solvent", self.solvent_system, self.solvent_coords,
             self.solvent_box),
        ]:
            # Run all complex legs first then solvent, as they will likely take longer than then solvent leg
            for mol_a, mol_b, core in edges:
                # Use DualTopology to ensure mols exist in the same space.
                topo = topology.DualTopologyMinimization(mol_a, mol_b, self.ff)
                rfe = free_energy.RelativeFreeEnergy(topo)
                min_coords = minimizer.minimize_host_4d([mol_a, mol_b],
                                                        host_system,
                                                        host_coords, self.ff,
                                                        host_box)
                unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                    ordered_params, host_system, min_coords)
                # num_host_coords = len(host_coords)
                # masses[num_host_coords:] *= 1000000 # Lets see if masses are the difference
                harmonic_bond_potential = unbound_potentials[0]
                bond_list = get_bond_list(harmonic_bond_potential)
                group_idxs = get_group_indices(bond_list)
                time_step = 1.5e-3
                if self.hmr:
                    masses = apply_hmr(masses, bond_list)
                    time_step = 2.5e-3
                integrator = LangevinIntegrator(temperature, time_step, 1.0,
                                                masses, 0)
                barostat = MonteCarloBarostat(coords.shape[0], pressure,
                                              temperature, group_idxs,
                                              barostat_interval, 0)
                pots = []
                for bp, params in zip(unbound_potentials, sys_params):
                    pots.append(bp.bind(np.asarray(params)))
                future = self.client.submit(
                    estimator.equilibrate, *[
                        integrator, barostat, pots, coords, host_box, lamb,
                        equilibration_steps
                    ])
                futures.append((stage, (mol_a, mol_b, core), future))
        num_equil = len(futures)
        for i, (stage, edge, future) in enumerate(futures):
            edge_hash = self._edge_hash(stage, *edge)
            self._equil_cache[edge_hash] = future.result()
            if (i + 1) % 5 == 0:
                print(f"Pre-equilibrated {i+1} of {num_equil} edges")
        print(f"Pre-equilibrated {num_equil} edges")
        if cache_path:
            with open(cache_path, "wb") as ofs:
                dump(self._equil_cache, ofs)
            print(f"Saved equilibration_cache to {cache_path}")
Example #18
0
    def predict(self, ff_params: list, mol_a: Chem.Mol, mol_b: Chem.Mol,
                core: np.ndarray):
        """
        Predict the ddG of morphing mol_a into mol_b. This function is differentiable w.r.t. ff_params.

        Parameters
        ----------

        ff_params: list of np.ndarray
            This should match the ordered params returned by the forcefield

        mol_a: Chem.Mol
            Starting molecule corresponding to lambda = 0

        mol_b: Chem.Mol
            Starting molecule corresponding to lambda = 1

        core: np.ndarray
            N x 2 list of ints corresponding to the atom mapping of the core.

        Returns
        -------
        float
            delta delta G in kJ/mol
        aux
            list of TI results
        """

        stage_dGs = []
        stage_results = []

        for stage, host_system, host_coords, host_box, lambda_schedule in [
            ("complex", self.complex_system, self.complex_coords,
             self.complex_box, self.complex_schedule),
            ("solvent", self.solvent_system, self.solvent_coords,
             self.solvent_box, self.solvent_schedule),
        ]:
            single_topology = topology.SingleTopology(mol_a, mol_b, core,
                                                      self.ff)
            rfe = free_energy.RelativeFreeEnergy(single_topology)
            edge_hash = self._edge_hash(stage, mol_a, mol_b, core)
            if self.pre_equilibrate and edge_hash in self._equil_cache:
                cached_state = self._equil_cache[edge_hash]
                x0 = cached_state.coords
                host_box = cached_state.box
                num_host_coords = len(host_coords)
                unbound_potentials, sys_params, masses, _ = rfe.prepare_host_edge(
                    ff_params, host_system, host_coords)
                mol_a_size = mol_a.GetNumAtoms()
                # Use Dual Topology to pre equilibrate, so have to get the mean of the two sets of mol,
                # normally done within prepare_host_edge, but the whole system has moved by this stage
                x0 = np.concatenate([
                    x0[:num_host_coords],
                    np.mean(
                        single_topology.interpolate_params(
                            x0[num_host_coords:num_host_coords + mol_a_size],
                            x0[num_host_coords + mol_a_size:]),
                        axis=0,
                    ),
                ])
            else:
                if self.pre_equilibrate:
                    print(
                        "Edge not correctly pre-equilibrated, ensure equilibrate_edges was called"
                    )
                print(
                    f"Minimizing the {stage} host structure to remove clashes."
                )
                # (ytz): this isn't strictly symmetric, and we should modify minimize later on remove
                # the hysteresis by jointly minimizing against a and b at the same time. We may also want
                # to remove the randomness completely from the minimization.
                min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b],
                                                             host_system,
                                                             host_coords,
                                                             self.ff, host_box)

                unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                    ff_params, host_system, min_host_coords)

                x0 = coords
            v0 = np.zeros_like(x0)

            time_step = 1.5e-3

            harmonic_bond_potential = unbound_potentials[0]
            bond_list = get_bond_list(harmonic_bond_potential)
            if self.hmr:
                masses = apply_hmr(masses, bond_list)
                time_step = 2.5e-3
            group_idxs = get_group_indices(bond_list)

            seed = 0

            temperature = 300.0
            pressure = 1.0

            integrator = LangevinIntegrator(temperature, time_step, 1.0,
                                            masses, seed)

            barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                          group_idxs, self.barostat_interval,
                                          seed)

            model = estimator.FreeEnergyModel(
                unbound_potentials,
                self.client,
                host_box,
                x0,
                v0,
                integrator,
                lambda_schedule,
                self.equil_steps,
                self.prod_steps,
                barostat,
            )

            dG, results = estimator.deltaG(model, sys_params)

            stage_dGs.append(dG)
            stage_results.append((stage, results))

        pred = stage_dGs[0] - stage_dGs[1]

        return pred, stage_results
Example #19
0
def calculate_rigorous_work(
    host_pdbfile,
    guests_sdfile,
    outdir,
    num_deletions,
    deletion_steps,
    insertion_max_lambda=0.5,
    insertion_steps=501,
    eq1_steps=5001,
    fewer_outfiles=False,
    no_outfiles=False,
):
    """Runs non-equilibrium deletion jobs:
    1. Solvates a protein, inserts guest, equilibrates, equilibrates more & spins off
       deletion jobs every 1000th step, calculates work.
    2. Does the same thing in solvent instead of protein.
    Does num_deletions deletion jobs per leg per compound.

    Parameters
    ----------

    host_pdbfile (str): path to host pdb file
    guests_sdfile (str): path to guests sdf file
    outdir (str): path to directory to which to write output
    num_deletions (int): number of deletion trajectories to run per leg per compound
    deletion_steps (int): length of each deletion trajectory
    insertion_max_lambda (float): how far away to insert from (0.0-1.0)
    insertion_steps (int): how long to insert over
    eq1_steps (int): how long to equilibrate after insertion and before starting the deletions
    fewer_outfiles (bool): only save the starting frame of each deletion trajectory
    no_outfiles (bool): don't keep any output files

    Returns
    -------

    {str: {str: float}}: map of compound to leg label to work values
                         {'guest_1': {'protein': [work values], 'solvent': [work_values]}, ...}

    Output
    ------

    A pdb & sdf file for each guest's final insertion step
      (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf)
      (unless fewer_outfiles or no_outfiles is True)
    A pdb & sdf file for each guest's final eq1 step
      (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf)
      (unless fewer_outfiles or no_outfiles is True)
    A pdb & sdf file for each deletion job's first step
      (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf)
      (unless no_outfiles is True)
    stdout corresponding to the files written noting the lambda value and energy
    stdout noting the work of deletion, if applicable
    stdout noting how long each leg took to run

    Note
    ----
    The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of
    force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py]
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}

    DELETION_MAX_LAMBDA = {DELETION_MAX_LAMBDA}
    MIN_LAMBDA = {MIN_LAMBDA}
    insertion_max_lambda = {insertion_max_lambda}
    insertion_steps = {insertion_steps}
    eq1_steps = {eq1_steps}
    num_deletions = {num_deletions}
    deletion_steps = {deletion_steps}
    """)

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True)
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    os.remove(solvated_host_pdb)

    # Prepare water box
    print("Generating water box...")
    # TODO: water box probably doesn't need to be this big
    box_lengths = host_box[np.diag_indices(3)]
    water_box_width = min(box_lengths)
    (
        water_system,
        water_coords,
        water_box,
        water_topology,
    ) = builders.build_water_system(water_box_width)

    # it's okay if the water box here and the solvated protein box don't align -- they have PBCs
    _, water_pdb = tempfile.mkstemp(suffix=".pdb", text=True)
    writer = pdb_writer.PDBWriter([water_topology], water_pdb)
    writer.write_frame(water_coords)
    writer.close()
    water_mol = Chem.MolFromPDBFile(water_pdb, removeHs=False)
    os.remove(water_pdb)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    # Run the procedure
    all_works = defaultdict(dict)
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(),
                                     dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units

        for system, coords, host_mol, box, label in zip(
            [solvated_host_system, water_system],
            [solvated_host_coords, water_coords],
            [solvated_host_mol, water_mol],
            [host_box, water_box],
            ["protein", "solvent"],
        ):
            minimized_coords = minimizer.minimize_host_4d([guest_mol], system,
                                                          coords, ff, box)

            afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff)
            ups, sys_params, combined_masses, combined_coords = afe.prepare_host_edge(
                ff.get_ordered_params(), system, minimized_coords)

            combined_bps = []
            for up, sp in zip(ups, sys_params):
                combined_bps.append(up.bind(sp))

            works = run_leg(
                minimized_coords,
                orig_guest_coords,
                combined_bps,
                combined_masses,
                box,
                guest_name,
                label,
                host_mol,
                guest_mol,
                outdir,
                num_deletions,
                deletion_steps,
                insertion_max_lambda,
                insertion_steps,
                eq1_steps,
                fewer_outfiles,
                no_outfiles,
            )
            all_works[guest_name][label] = works
            end_time = time.time()
            print(
                f"{guest_name} {label} leg time:",
                "%.2f" % (end_time - start_time),
                "seconds",
            )
    return all_works
Example #20
0
def benchmark_hif2a(verbose=False, num_batches=100, steps_per_batch=1000):

    from timemachine.testsystems.relative import hif2a_ligand_pair as testsystem

    mol_a, mol_b, core = testsystem.mol_a, testsystem.mol_b, testsystem.core

    ff = Forcefield.load_from_file("smirnoff_1_1_0_sc.py")

    single_topology = SingleTopology(mol_a, mol_b, core, ff)
    rfe = free_energy.RelativeFreeEnergy(single_topology)

    ff_params = ff.get_ordered_params()

    # build the protein system.
    complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
        "tests/data/hif2a_nowater_min.pdb"
    )

    solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(4.0)

    for stage, host_system, host_coords, host_box in [
        ("hif2a", complex_system, complex_coords, complex_box),
        ("solvent", solvent_system, solvent_coords, solvent_box),
    ]:

        host_fns, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.0)

        # resolve host clashes
        min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, ff, host_box)

        x0 = min_host_coords
        v0 = np.zeros_like(x0)

        # lamb = 0.0
        benchmark(
            stage + "-apo",
            host_masses,
            0.0,
            x0,
            v0,
            host_box,
            host_fns,
            verbose=verbose,
            num_batches=num_batches,
            steps_per_batch=steps_per_batch,
        )
        benchmark(
            stage + "-apo-barostat-interval-25",
            host_masses,
            0.0,
            x0,
            v0,
            host_box,
            host_fns,
            verbose=verbose,
            num_batches=num_batches,
            steps_per_batch=steps_per_batch,
            barostat_interval=25,
        )

        # RBFE
        unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(ff_params, host_system, x0)

        bound_potentials = [x.bind(y) for (x, y) in zip(unbound_potentials, sys_params)]

        x0 = coords
        v0 = np.zeros_like(x0)

        # lamb = 0.5
        benchmark(
            stage + "-rbfe-with-du-dp",
            masses,
            0.5,
            x0,
            v0,
            host_box,
            bound_potentials,
            verbose=verbose,
            num_batches=num_batches,
            steps_per_batch=steps_per_batch,
        )

        for du_dl_interval in [0, 1, 5]:
            benchmark(
                stage + "-rbfe-du-dl-interval-" + str(du_dl_interval),
                masses,
                0.5,
                x0,
                v0,
                host_box,
                bound_potentials,
                verbose=verbose,
                num_batches=num_batches,
                steps_per_batch=steps_per_batch,
                compute_du_dl_interval=du_dl_interval,
            )
Example #21
0
    def simulate_pair(epoch: int, blocker: Chem.Mol, mol: Chem.Mol):
        verify_rabfe_pair(mol, blocker)
        mol_name = mol.GetProp("_Name")

        # generate the core_idxs
        core_idxs = setup_relative_restraints_by_distance(mol, blocker)
        mol_coords = get_romol_conf(mol)  # original coords

        num_complex_atoms = complex_coords.shape[0]
        num_solvent_atoms = solvent_coords.shape[0]

        # Use core_idxs to generate
        R, t = rmsd.get_optimal_rotation_and_translation(
            x1=complex_ref_x0[num_complex_atoms:][
                core_idxs[:, 1]],  # reference core atoms
            x2=mol_coords[core_idxs[:, 0]],  # mol core atoms
        )

        aligned_mol_coords = rmsd.apply_rotation_and_translation(
            mol_coords, R, t)

        ref_coords = complex_ref_x0[num_complex_atoms:]
        complex_host_coords = complex_ref_x0[:num_complex_atoms]
        complex_box0 = complex_ref_box0

        solvent_host_coords = solvent_ref_x0[:num_solvent_atoms]
        solvent_box0 = solvent_ref_box0

        # compute the free energy of swapping an interacting mol with a non-interacting reference mol
        complex_decouple_x0 = minimizer.minimize_host_4d(
            [mol, blocker_mol],
            complex_system,
            complex_host_coords,
            forcefield,
            complex_box0,
            [aligned_mol_coords, ref_coords],
        )
        complex_decouple_x0 = np.concatenate(
            [complex_decouple_x0, aligned_mol_coords, ref_coords])

        # compute the free energy of conversion in complex
        complex_conversion_x0 = minimizer.minimize_host_4d(
            [mol],
            complex_system,
            complex_host_coords,
            forcefield,
            complex_box0,
            [aligned_mol_coords],
        )
        complex_conversion_x0 = np.concatenate(
            [complex_conversion_x0, aligned_mol_coords])

        min_solvent_coords = minimizer.minimize_host_4d([mol], solvent_system,
                                                        solvent_host_coords,
                                                        forcefield,
                                                        solvent_box0)
        solvent_x0 = np.concatenate([min_solvent_coords, mol_coords])

        suffix = f"{mol_name}_{epoch}"

        seed = np.random.randint(np.iinfo(np.int32).max)

        # Order of these simulations should match the order in which predictions are computed to ensure
        # efficient use of parallelism.
        return {
            "solvent_conversion":
            binding_model_solvent_conversion.simulate_futures(
                ordered_params,
                mol,
                solvent_x0,
                solvent_box0,
                prefix="solvent_conversion_" + suffix,
                seed=seed,
            ),
            "solvent_decouple":
            binding_model_solvent_decouple.simulate_futures(
                ordered_params,
                mol,
                solvent_x0,
                solvent_box0,
                prefix="solvent_decouple_" + suffix,
                seed=seed,
            ),
            "complex_conversion":
            binding_model_complex_conversion.simulate_futures(
                ordered_params,
                mol,
                complex_conversion_x0,
                complex_box0,
                prefix="complex_conversion_" + suffix,
                seed=seed,
            ),
            "complex_decouple":
            binding_model_complex_decouple.simulate_futures(
                ordered_params,
                mol,
                blocker_mol,
                core_idxs,
                complex_decouple_x0,
                complex_box0,
                prefix="complex_decouple_" + suffix,
                seed=seed,
            ),
            "mol":
            mol,
            "blocker":
            blocker,
            "epoch":
            epoch,
            "seed":
            seed,
        }
Example #22
0
def test_barostat_is_deterministic():
    """Verify that the barostat results in the same box size shift after 1000
    steps. This is important to debugging as well as providing the ability to replicate
    simulations
    """
    platform_version = get_platform_version()
    lam = 1.0
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    barostat_interval = 3
    collision_rate = 1.0 / unit.picosecond
    seed = 2021
    np.random.seed(seed)

    # OpenEye's AM1 Charging values are OS platform dependent. To ensure that we have deterministic values
    # we check against our two most common OS versions, Ubuntu 18.04 and 20.04.
    box_vol = 26.869380588831582
    lig_charge_vals = np.array([
        1.4572377542719206, -0.37011462071257184, 1.1478267014520305,
        -4.920284514559682, 0.16985194917937935
    ])
    if "ubuntu" not in platform_version:
        print(
            f"Test expected to run under ubuntu 20.04 or 18.04, got {platform_version}"
        )
    if "18.04" in platform_version:
        box_vol = 26.711716908713402
        lig_charge_vals[3] = -4.920166483601927

    pressure = 1.0 * unit.atmosphere

    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    u_impls = []
    # Look at the first five atoms and their assigned charges
    ligand_charges = sys_params[-1][:, 0][len(min_complex_coords):][:5]
    np.testing.assert_array_almost_equal(lig_charge_vals,
                                         ligand_charges,
                                         decimal=5)
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    v_0 = sample_velocities(masses * unit.amu, temperature)

    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        barostat_interval,
        u_impls,
        seed,
    )

    ctxt = custom_ops.Context(coords,
                              v_0,
                              complex_box,
                              integrator_impl,
                              u_impls,
                              barostat=baro)
    ctxt.multiple_steps(np.ones(1000) * lam)
    atm_box = ctxt.get_box()
    np.testing.assert_almost_equal(compute_box_volume(atm_box),
                                   box_vol,
                                   decimal=5)