Esempio n. 1
0
    def test_avg_potential_param_sizes_is_zero(self):
        np.random.seed(814)

        N = 8
        D = 3

        x0 = np.random.rand(N, D).astype(dtype=np.float64) * 2

        masses = np.random.rand(N)

        v0 = np.random.rand(x0.shape[0], x0.shape[1])

        num_steps = 3
        ca = np.random.rand()
        cbs = -np.random.rand(len(masses)) / 1
        ccs = np.zeros_like(cbs)

        dt = 2e-3
        lamb = np.random.rand()
        box = np.eye(3) * 1.5

        intg = custom_ops.LangevinIntegrator(dt, ca, cbs, ccs, 814)

        # Construct a 'bad' centroid restraint
        potential = potentials.CentroidRestraint(
            np.random.randint(N, size=5, dtype=np.int32),
            np.random.randint(N, size=5, dtype=np.int32), 10.0, 0.0)
        # Bind to empty params
        bp = potential.bind(np.zeros(0)).bound_impl(precision=np.float64)

        ctxt = custom_ops.Context(x0, v0, box, intg, [bp])

        for _ in range(num_steps):
            ctxt.step(lamb)
Esempio n. 2
0
def equilibrate(integrator, barostat, potentials, coords, box, lamb,
                equil_steps) -> Tuple:
    all_impls = []
    v0 = np.zeros_like(coords)

    for bp in potentials:
        impl = bp.bound_impl(np.float32)
        all_impls.append(impl)

    if integrator.seed == 0:
        integrator = copy.deepcopy(integrator)
        integrator.seed = np.random.randint(np.iinfo(np.int32).max)

    if barostat.seed == 0:
        barostat = copy.deepcopy(barostat)
        barostat.seed = np.random.randint(np.iinfo(np.int32).max)

    intg_impl = integrator.impl()
    baro_impl = barostat.impl(all_impls)
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(
        coords,
        v0,
        box,
        intg_impl,
        all_impls,
        barostat=baro_impl,
    )

    # equilibration
    equil_schedule = np.ones(equil_steps) * lamb
    ctxt.multiple_steps(equil_schedule)
    return CoordsVelBox(coords=ctxt.get_x_t(),
                        velocities=ctxt.get_v_t(),
                        box=ctxt.get_box())
Esempio n. 3
0
    def test_set_and_get(self):
        """
        This test the setters and getters in the context.
        """

        np.random.seed(4321)

        N = 8
        D = 3

        x0 = np.random.rand(N, D).astype(dtype=np.float64) * 2

        E = 2

        lambda_plane_idxs = np.random.randint(low=0,
                                              high=2,
                                              size=N,
                                              dtype=np.int32)
        lambda_offset_idxs = np.random.randint(low=0,
                                               high=2,
                                               size=N,
                                               dtype=np.int32)

        params, _, test_nrg = prepare_nb_system(
            x0,
            E,
            lambda_plane_idxs,
            lambda_offset_idxs,
            p_scale=3.0,
            cutoff=1.0,
        )

        masses = np.random.rand(N)
        v0 = np.random.rand(x0.shape[0], x0.shape[1])

        temperature = 300
        dt = 2e-3
        friction = 0.0
        ca, cbs, ccs = langevin_coefficients(temperature, dt, friction, masses)

        box = np.eye(3) * 3.0
        intg = custom_ops.LangevinIntegrator(dt, ca, cbs, ccs, 1234)

        bp = test_nrg.bind(params).bound_impl(precision=np.float64)
        bps = [bp]

        ctxt = custom_ops.Context(x0, v0, box, intg, bps)

        np.testing.assert_equal(ctxt.get_x_t(), x0)
        np.testing.assert_equal(ctxt.get_v_t(), v0)
        np.testing.assert_equal(ctxt.get_box(), box)

        new_x = np.random.rand(N, 3)
        ctxt.set_x_t(new_x)

        np.testing.assert_equal(ctxt.get_x_t(), new_x)
Esempio n. 4
0
def do_deletion(
    x0,
    v0,
    combined_bps,
    combined_masses,
    box,
    guest_name,
    leg_type,
    u_impls,
    deletion_steps,
):
    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    # du_dl_obs = custom_ops.FullPartialUPartialLambda(u_impls, subsample_freq)
    # ctxt.add_observable(du_dl_obs)

    deletion_lambda_schedule = np.linspace(MIN_LAMBDA, DELETION_MAX_LAMBDA,
                                           deletion_steps)

    subsample_freq = 1
    full_du_dls, _, _ = ctxt.multiple_steps(deletion_lambda_schedule,
                                            subsample_freq)

    step = len(deletion_lambda_schedule) - 1
    lamb = deletion_lambda_schedule[-1]
    ctxt.step(lamb)
    report.report_step(
        ctxt,
        step,
        lamb,
        box,
        combined_bps,
        u_impls,
        guest_name,
        deletion_steps,
        f"{leg_type.upper()}_DELETION",
    )

    if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls):
        print("Not calculating work (too much force)")
        return None

    # Note: this condition only applies for ABFE, not RBFE
    if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001:
        print("Not calculating work (du_dl endpoints are not ~0)")
        return None

    work = np.trapz(full_du_dls, deletion_lambda_schedule[::subsample_freq])
    print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}")
    return work
Esempio n. 5
0
def do_deletion(x0, v0, combined_bps, combined_masses, box, guest_name, leg_type):
    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    subsample_freq = 2
    du_dl_obs = custom_ops.FullPartialUPartialLambda(u_impls, subsample_freq)
    ctxt.add_observable(du_dl_obs)

    deletion_lambda_schedule = np.linspace(
        MIN_LAMBDA, DELETION_MAX_LAMBDA, TRANSITION_STEPS
    )

    calc_work = True

    for step, lamb in enumerate(deletion_lambda_schedule):
        ctxt.step(lamb)
        if step % 100 == 0:
            report.report_step(
                ctxt,
                step,
                lamb,
                box,
                combined_bps,
                u_impls,
                guest_name,
                TRANSITION_STEPS,
                f"{leg_type.upper()}_DELETION",
            )
        if step in (0, int(TRANSITION_STEPS/2), TRANSITION_STEPS-1):
            if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls):
                calc_work = False
                return

    # Note: this condition only applies for ABFE, not RBFE
    if (
        abs(du_dl_obs.full_du_dl()[0]) > 0.001
        or abs(du_dl_obs.full_du_dl()[-1]) > 0.001
    ):
        print("Error: du_dl endpoints are not ~0")
        calc_work = False

    if calc_work:
        work = np.trapz(
            du_dl_obs.full_du_dl(), deletion_lambda_schedule[::subsample_freq]
        )
        print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}")
Esempio n. 6
0
def equilibrate_solvent_phase(
    ubps,
    params,
    masses,
    coords,  # minimized_coords
    box,
    temperature,
    pressure,
    num_steps,
    seed=None,
):
    """
    Generate samples in the solvent phase.
    """

    dt = 1e-4
    friction = 1.0

    bps = []
    for p, bp in zip(params, ubps):
        bps.append(bp.bind(p))

    all_impls = [bp.bound_impl(np.float32) for bp in bps]

    intg_equil = lib.LangevinIntegrator(temperature, dt, friction, masses, seed)
    intg_equil_impl = intg_equil.impl()

    bond_list = get_bond_list(ubps[0])
    group_idxs = get_group_indices(bond_list)
    barostat_interval = 5

    barostat = lib.MonteCarloBarostat(len(masses), pressure, temperature, group_idxs, barostat_interval, seed + 1)
    barostat_impl = barostat.impl(all_impls)

    # equilibration/minimization doesn't need a barostat
    equil_ctxt = custom_ops.Context(coords, np.zeros_like(coords), box, intg_equil_impl, all_impls, barostat_impl)

    lamb = 0.0

    # TODO: revert to 50k
    equil_schedule = np.ones(num_steps) * lamb
    equil_ctxt.multiple_steps(equil_schedule)

    x0 = equil_ctxt.get_x_t()

    # (ytz): This has to be zeros_like for now since if we freeze ligand
    # coordinates it would start to move during rejected moves.
    v0 = np.zeros_like(x0)

    return CoordsVelBox(x0, v0, equil_ctxt.get_box())
Esempio n. 7
0
def do_switch(
    x0,
    v0,
    combined_bps,
    combined_masses,
    box,
    guest_name,
    leg_type,
    u_impls,
    transition_steps,
):
    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    switching_lambda_schedule = np.linspace(MIN_LAMBDA, MAX_LAMBDA,
                                            transition_steps)

    subsample_interval = 1
    full_du_dls, _, _ = ctxt.multiple_steps(switching_lambda_schedule,
                                            subsample_interval)

    step = len(switching_lambda_schedule) - 1
    lamb = switching_lambda_schedule[-1]
    ctxt.step(lamb)
    report.report_step(
        ctxt,
        step,
        lamb,
        box,
        combined_bps,
        u_impls,
        guest_name,
        transition_steps,
        f"{leg_type.upper()}_SWITCH",
    )

    if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls):
        return

    work = np.trapz(full_du_dls,
                    switching_lambda_schedule[::subsample_interval])
    print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}")
    return work
Esempio n. 8
0
    def move(self, x: CoordsVelBox):
        # note: context creation overhead here is actually very small!
        ctxt = custom_ops.Context(x.coords, x.velocities, x.box,
                                  self.integrator_impl, self.bound_impls,
                                  self.barostat_impl)

        # arguments: lambda_schedule, du_dl_interval, x_interval
        _ = ctxt.multiple_steps(self.lamb * np.ones(self.n_steps), 0, 0)
        x_t = ctxt.get_x_t()
        v_t = ctxt.get_v_t()
        box = ctxt.get_box()

        after_npt = CoordsVelBox(x_t, v_t, box)

        self.n_proposed += 1
        self.n_accepted += 1

        return after_npt
Esempio n. 9
0
def run(args):

    lamb, intg, bound_potentials, masses, x0, box, gpu_idx, stage = args
    # print("running on", gpu_idx)
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_idx)
    u_impls = []
    for bp in bound_potentials:
        u_impls.append(bp.bound_impl(precision=np.float32))

    # important that we reseed here.
    intg.seed = np.random.randint(np.iinfo(np.int32).max)
    intg_impl = intg.impl()

    v0 = np.zeros_like(x0)

    ctxt = custom_ops.Context(x0, v0, box, intg_impl, u_impls)

    # secondary minimization needed only for stage 1
    if stage == 1:
        for l in np.linspace(0.35, lamb, 500):
            ctxt.step(l)

    # equilibration
    for step in range(20000):
        # for step in range(1000):
        ctxt.step(lamb)

    # print(ctxt.get_x_t())

    du_dl_obs = custom_ops.AvgPartialUPartialLambda(u_impls, 10)
    ctxt.add_observable(du_dl_obs)

    # add observable for <du/dl>
    for step in range(50000):
        # for step in range(5000):
        ctxt.step(lamb)

    print(lamb, du_dl_obs.avg_du_dl())

    assert np.any(np.abs(ctxt.get_x_t()) > 100) == False
    assert np.any(np.isnan(ctxt.get_x_t())) == False
    assert np.any(np.isinf(ctxt.get_x_t())) == False

    return du_dl_obs.avg_du_dl()
Esempio n. 10
0
def minimize(args):

    bound_potentials, masses, x0, box = args

    u_impls = []
    for bp in bound_potentials:
        u_impls.append(bp.bound_impl(precision=np.float32))

    seed = np.random.randint(np.iinfo(np.int32).max)

    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed).impl()

    v0 = np.zeros_like(x0)

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    lambda_schedule = np.linspace(0.35, 0.0, 500)
    for lamb in lambda_schedule:
        ctxt.step(lamb)

    return ctxt.get_x_t()
Esempio n. 11
0
def dock_and_equilibrate(
    host_pdbfile,
    guests_sdfile,
    max_lambda,
    insertion_steps,
    eq_steps,
    outdir,
    fewer_outfiles=False,
    constant_atoms=[],
):
    """Solvates a host, inserts guest(s) into solvated host, equilibrates

    Parameters
    ----------

    host_pdbfile: path to host pdb file to dock into
    guests_sdfile: path to input sdf with guests to pose/dock
    max_lambda: lambda value the guest should insert from or delete to
        (recommended: 1.0 for work calulation, 0.25 to stay close to original pose)
        (must be =1 for work calculation to be applicable)
    insertion_steps: how many steps to insert the guest over (recommended: 501)
    eq_steps: how many steps of equilibration to do after insertion (recommended: 15001)
    outdir: where to write output (will be created if it does not already exist)
    fewer_outfiles: if True, will only write frames for the equilibration, not insertion
    constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation
        (1-indexed, like PDB files)

    Output
    ------

    A pdb & sdf file for the last step of insertion
       (outdir/<guest_name>/<guest_name>_ins_<step>_[host.pdb/guest.sdf])
    A pdb & sdf file every 1000 steps of equilibration
       (outdir/<guest_name>/<guest_name>_eq_<step>_[host.pdb/guest.sdf])
    stdout corresponding to the files written noting the lambda value and energy
    stdout for each guest noting the work of transition, if applicable
    stdout for each guest noting how long it took to run

    Note
    ----
    The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of
    force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py]
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}
    MAX_LAMBDA = {max_lambda}
    INSERTION_STEPS = {insertion_steps}
    EQ_STEPS = {eq_steps}
    """)

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True)
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    os.remove(solvated_host_pdb)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    # Run the procedure
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(),
                                     dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units

        minimized_coords = minimizer.minimize_host_4d([guest_mol],
                                                      solvated_host_system,
                                                      solvated_host_coords, ff,
                                                      host_box)

        afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff)

        ups, sys_params, combined_masses, _ = afe.prepare_host_edge(
            ff.get_ordered_params(), solvated_host_system, minimized_coords)

        combined_bps = []
        for up, sp in zip(ups, sys_params):
            combined_bps.append(up.bind(sp))

        x0 = np.concatenate([minimized_coords, orig_guest_coords])
        v0 = np.zeros_like(x0)
        print("SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}")

        for atom_num in constant_atoms:
            combined_masses[atom_num - 1] += 50000

        seed = 2021
        intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses,
                                  seed).impl()

        u_impls = []
        for bp in combined_bps:
            bp_impl = bp.bound_impl(precision=np.float32)
            u_impls.append(bp_impl)

        ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

        # insert guest
        insertion_lambda_schedule = np.linspace(max_lambda, 0.0,
                                                insertion_steps)
        calc_work = True

        # collect a du_dl calculation once every other step
        subsample_interval = 1

        full_du_dls, _, _ = ctxt.multiple_steps(insertion_lambda_schedule,
                                                subsample_interval)
        step = len(insertion_lambda_schedule) - 1
        lamb = insertion_lambda_schedule[-1]
        ctxt.step(lamb)

        report.report_step(
            ctxt,
            step,
            lamb,
            host_box,
            combined_bps,
            u_impls,
            guest_name,
            insertion_steps,
            "INSERTION",
        )
        if not fewer_outfiles:
            host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10
            guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10
            report.write_frame(
                host_coords,
                solvated_host_mol,
                guest_coords,
                guest_mol,
                guest_name,
                outdir,
                str(step).zfill(len(str(insertion_steps))),
                "ins",
            )

        if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls):
            print("Not calculating work (too much force)")
            calc_work = False
            continue

        # Note: this condition only applies for ABFE, not RBFE
        if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001:
            print("Not calculating work (du_dl endpoints are not ~0)")
            calc_work = False

        if calc_work:
            work = np.trapz(full_du_dls,
                            insertion_lambda_schedule[::subsample_interval])
            print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}")

        # equilibrate
        for step in range(eq_steps):
            ctxt.step(0.00)
            if step % 1000 == 0:
                report.report_step(
                    ctxt,
                    step,
                    0.00,
                    host_box,
                    combined_bps,
                    u_impls,
                    guest_name,
                    eq_steps,
                    "EQUILIBRATION",
                )
                if (not fewer_outfiles) or (step == eq_steps - 1):
                    host_coords = ctxt.get_x_t()[:len(solvated_host_coords
                                                      )] * 10
                    guest_coords = ctxt.get_x_t()[len(solvated_host_coords
                                                      ):] * 10
                    report.write_frame(
                        host_coords,
                        solvated_host_mol,
                        guest_coords,
                        guest_mol,
                        guest_name,
                        outdir,
                        str(step).zfill(len(str(eq_steps))),
                        "eq",
                    )
            if step in (0, int(eq_steps / 2), eq_steps - 1):
                if report.too_much_force(ctxt, 0.00, host_box, combined_bps,
                                         u_impls):
                    break

        end_time = time.time()
        print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
Esempio n. 12
0
def benchmark(
    label,
    masses,
    lamb,
    x0,
    v0,
    box,
    bound_potentials,
    hmr=False,
    verbose=True,
    num_batches=100,
    steps_per_batch=1000,
    compute_du_dl_interval=0,
    barostat_interval=0,
):
    """
    TODO: configuration blob containing num_batches, steps_per_batch, and any other options
    """

    seed = 1234
    dt = 1.5e-3
    temperature = 300
    pressure = 1.0
    seconds_per_day = 86400

    harmonic_bond_potential = bound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    if hmr:
        dt = 2.5e-3
        masses = apply_hmr(masses, bond_list)
    intg = LangevinIntegrator(temperature, dt, 1.0, np.array(masses), seed).impl()

    bps = []

    for potential in bound_potentials:
        bps.append(potential.bound_impl(precision=np.float32))  # get the bound implementation

    baro_impl = None
    if barostat_interval > 0:
        group_idxs = get_group_indices(bond_list)
        baro = MonteCarloBarostat(
            x0.shape[0],
            pressure,
            temperature,
            group_idxs,
            barostat_interval,
            seed,
        )
        baro_impl = baro.impl(bps)

    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg,
        bps,
        barostat=baro_impl,
    )

    batch_times = []

    lambda_schedule = np.ones(steps_per_batch) * lamb

    # run once before timer starts
    ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval)

    start = time.time()

    for batch in range(num_batches):

        # time the current batch
        batch_start = time.time()
        du_dls, _, _ = ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval)
        batch_end = time.time()

        delta = batch_end - batch_start

        batch_times.append(delta)

        steps_per_second = steps_per_batch / np.mean(batch_times)
        steps_per_day = steps_per_second * seconds_per_day

        ps_per_day = dt * steps_per_day
        ns_per_day = ps_per_day * 1e-3

        if verbose:
            print(f"steps per second: {steps_per_second:.3f}")
            print(f"ns per day: {ns_per_day:.3f}")

    assert np.all(np.abs(ctxt.get_x_t()) < 1000)

    print(
        f"{label}: N={x0.shape[0]} speed: {ns_per_day:.2f}ns/day dt: {dt*1e3}fs (ran {steps_per_batch * num_batches} steps in {(time.time() - start):.2f}s)"
    )
Esempio n. 13
0
def pose_dock(
    guests_sdfile,
    host_pdbfile,
    transition_type,
    n_steps,
    transition_steps,
    max_lambda,
    outdir,
    random_rotation=False,
    constant_atoms=[],
):
    """Runs short simulations in which the guests phase in or out over time

    Parameters
    ----------

    guests_sdfile: path to input sdf with guests to pose/dock
    host_pdbfile: path to host pdb file to dock into
    transition_type: "insertion" or "deletion"
    n_steps: how many total steps of simulation to do (recommended: <= 1000)
    transition_steps: how many steps to insert/delete the guest over (recommended: <= 500)
        (must be <= n_steps)
    max_lambda: lambda value the guest should insert from or delete to
        (recommended: 1.0 for work calulation, 0.25 to stay close to original pose)
        (must be =1 for work calculation to be applicable)
    outdir: where to write output (will be created if it does not already exist)
    random_rotation: whether to apply a random rotation to each guest before inserting
    constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation
        (1-indexed, like PDB files)

    Output
    ------

    A pdb & sdf file every 100 steps (outdir/<guest_name>_<step>.pdb)
    stdout every 100 steps noting the step number, lambda value, and energy
    stdout for each guest noting the work of transition
    stdout for each guest noting how long it took to run

    Note
    ----
    If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py],
    the simulation for that guest will stop and the work will not be calculated.
    """
    assert transition_steps <= n_steps
    assert transition_type in ("insertion", "deletion")
    if random_rotation:
        assert transition_type == "insertion"

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    host_mol = Chem.MolFromPDBFile(host_pdbfile, removeHs=False)
    amber_ff = app.ForceField("amber99sbildn.xml", "tip3p.xml")
    host_file = PDBFile(host_pdbfile)
    host_system = amber_ff.createSystem(
        host_file.topology,
        nonbondedMethod=app.NoCutoff,
        constraints=None,
        rigidWater=False,
    )
    host_conf = []
    for x, y, z in host_file.positions:
        host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)])
    host_conf = np.array(host_conf)

    final_potentials = []
    host_potentials, host_masses = openmm_deserializer.deserialize_system(
        host_system, cutoff=1.2)
    host_nb_bp = None
    for bp in host_potentials:
        if isinstance(bp, potentials.Nonbonded):
            # (ytz): hack to ensure we only have one nonbonded term
            assert host_nb_bp is None
            host_nb_bp = bp
        else:
            final_potentials.append(bp)

    # TODO (ytz): we should really fix this later on. This padding was done to
    # address the particles that are too close to the boundary.
    padding = 0.1
    box_lengths = np.amax(host_conf, axis=0) - np.amin(host_conf, axis=0)
    box_lengths = box_lengths + padding
    box = np.eye(3, dtype=np.float64) * box_lengths

    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_ff_handlers = deserialize_handlers(
            open(
                os.path.join(
                    os.path.dirname(os.path.abspath(__file__)),
                    "..",
                    "ff/params/smirnoff_1_1_0_ccc.py",
                )).read())
        ff = Forcefield(guest_ff_handlers)
        guest_base_topology = topology.BaseTopology(guest_mol, ff)

        # combine
        hgt = topology.HostGuestTopology(host_nb_bp, guest_base_topology)
        # setup the parameter handlers for the ligand
        bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle],
                         [hgt.parameterize_harmonic_angle, ff.ha_handle],
                         [hgt.parameterize_proper_torsion, ff.pt_handle],
                         [hgt.parameterize_improper_torsion, ff.it_handle]]
        these_potentials = list(final_potentials)
        # instantiate the vjps while parameterizing (forward pass)
        for fn, handle in bonded_tuples:
            params, potential = fn(handle.params)
            these_potentials.append(potential.bind(params))
        nb_params, nb_potential = hgt.parameterize_nonbonded(
            ff.q_handle.params, ff.lj_handle.params)
        these_potentials.append(nb_potential.bind(nb_params))
        bps = these_potentials

        guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()]
        masses = np.concatenate([host_masses, guest_masses])

        for atom_num in constant_atoms:
            masses[atom_num - 1] += 50000

        conformer = guest_mol.GetConformer(0)
        mol_conf = np.array(conformer.GetPositions(), dtype=np.float64)
        mol_conf = mol_conf / 10  # convert to md_units

        if random_rotation:
            center = np.mean(mol_conf, axis=0)
            mol_conf -= center
            from scipy.stats import special_ortho_group

            mol_conf = np.matmul(mol_conf, special_ortho_group.rvs(3))
            mol_conf += center

        x0 = np.concatenate([host_conf, mol_conf])  # combined geometry
        v0 = np.zeros_like(x0)

        seed = 2021
        intg = LangevinIntegrator(300, 1.5e-3, 1.0, masses, seed).impl()

        impls = []
        precision = np.float32
        for b in bps:
            p_impl = b.bound_impl(precision)
            impls.append(p_impl)

        ctxt = custom_ops.Context(x0, v0, box, intg, impls)

        # collect a du_dl calculation once every other step
        subsample_freq = 2
        du_dl_obs = custom_ops.FullPartialUPartialLambda(impls, subsample_freq)
        ctxt.add_observable(du_dl_obs)

        if transition_type == "insertion":
            new_lambda_schedule = np.concatenate([
                np.linspace(max_lambda, 0.0, transition_steps),
                np.zeros(n_steps - transition_steps),
            ])
        elif transition_type == "deletion":
            new_lambda_schedule = np.concatenate([
                np.linspace(0.0, max_lambda, transition_steps),
                np.ones(n_steps - transition_steps) * max_lambda,
            ])
        else:
            raise (RuntimeError(
                'invalid `transition_type` (must be one of ["insertion", "deletion"])'
            ))

        calc_work = True
        for step, lamb in enumerate(new_lambda_schedule):
            ctxt.step(lamb)
            if step % 100 == 0:
                report.report_step(ctxt, step, lamb, box, bps, impls,
                                   guest_name, n_steps, 'pose_dock')
                host_coords = ctxt.get_x_t()[:len(host_conf)] * 10
                guest_coords = ctxt.get_x_t()[len(host_conf):] * 10
                report.write_frame(host_coords, host_mol, guest_coords,
                                   guest_mol, guest_name, outdir, step, 'pd')
            if step in (0, int(n_steps / 2), n_steps - 1):
                if report.too_much_force(ctxt, lamb, box, bps, impls):
                    calc_work = False
                    break

        # Note: this condition only applies for ABFE, not RBFE
        if (abs(du_dl_obs.full_du_dl()[0]) > 0.001
                or abs(du_dl_obs.full_du_dl()[-1]) > 0.001):
            print("Error: du_dl endpoints are not ~0")
            calc_work = False

        if calc_work:
            work = np.trapz(du_dl_obs.full_du_dl(),
                            new_lambda_schedule[::subsample_freq])
            print(f"guest_name: {guest_name}\twork: {work:.2f}")
        end_time = time.time()
        print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
Esempio n. 14
0
    #   wrapper function -- since contexts are not pickle-able -- which will
    #   be useful later in timemachine's multi-device parallelization strategy)
    # note: OpenMM unit system used throughout
    #   (temperature: kelvin, timestep: picosecond, collision_rate: picosecond^-1)
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    u_impls = []

    for bp in final_potentials:
        u_impls.append(bp.bound_impl(np.float32))

    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    for step, lamb in enumerate(np.linspace(1.0, final_lamb, 1000)):
        if step % 500 == 0:
            writer.write_frame(ctxt.get_x_t() * 10)
        ctxt.step(lamb)

    # print("insertion energy", ctxt._get_u_t_minus_1())

    # note: these 5000 steps are "equilibration", before we attach a reporter /
    #   "observable" to the context and start running "production"
    for step in range(5000):
        if step % 500 == 0:
            writer.write_frame(ctxt.get_x_t() * 10)
        ctxt.step(final_lamb)
Esempio n. 15
0
def simulate(
    lamb,
    box,
    x0,
    v0,
    final_potentials,
    integrator,
    barostat,
    equil_steps,
    prod_steps,
    x_interval,
    u_interval,
    lambda_windows,
):
    """
    Run a simulation and collect relevant statistics for this simulation.

    Parameters
    ----------
    lamb: float
        lambda value used for the equilibrium simulation

    box: np.array
        3x3 numpy array of the box, dtype should be np.float64

    x0: np.array
        Nx3 numpy array of the coordinates

    v0: np.array
        Nx3 numpy array of the velocities

    final_potentials: list
        list of unbound potentials

    integrator: timemachine.Integrator
        integrator to be used for dynamics

    barostat: timemachine.Barostat
        barostat to be used for equilibration

    equil_steps: int
        number of equilibration steps

    prod_steps: int
        number of production steps

    x_interval: int
        how often we store coordinates. If x_interval == 0 then
        no frames are returned.

    u_interval: int
        how often we store energies. If u_interval == 0 then
        no energies are returned

    lambda_windows: list of float
        lambda windows we evaluate energies at.

    Returns
    -------
    SimulationResult
        Results of the simulation.

    """

    all_impls = []

    for bp in final_potentials:
        impl = bp.bound_impl(np.float32)
        all_impls.append(impl)

    # fire minimize once again, needed for parameter interpolation
    x0 = minimizer.fire_minimize(x0, all_impls, box,
                                 np.ones(100, dtype=np.float64) * lamb)

    # sanity check that forces are well behaved
    for bp in all_impls:
        du_dx, du_dl, u = bp.execute(x0, box, lamb)
        norm_forces = np.linalg.norm(du_dx, axis=1)
        assert np.all(norm_forces < 25000
                      ), "Forces much greater than expected after minimization"
    if integrator.seed == 0:
        # this deepcopy is needed if we're running if client == None
        integrator = copy.deepcopy(integrator)
        integrator.seed = np.random.randint(np.iinfo(np.int32).max)

    if barostat.seed == 0:
        barostat = copy.deepcopy(barostat)
        barostat.seed = np.random.randint(np.iinfo(np.int32).max)

    intg_impl = integrator.impl()
    # technically we need to only pass in the nonbonded impl
    barostat_impl = barostat.impl(all_impls)
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, intg_impl, all_impls, barostat_impl)

    # equilibration
    equil_schedule = np.ones(equil_steps) * lamb
    ctxt.multiple_steps(equil_schedule)

    # (ytz): intentionally hard-coded, I'd rather the end-user *not*
    # muck with this unless they have a good reason to.
    barostat_impl.set_interval(25)

    full_us, xs, boxes = ctxt.multiple_steps_U(lamb, prod_steps,
                                               np.array(lambda_windows),
                                               u_interval, x_interval)

    result = SimulationResult(
        xs=xs.astype("float32"),
        boxes=boxes.astype("float32"),
        lambda_us=full_us,
    )

    return result
Esempio n. 16
0
def test_barostat_partial_group_idxs():
    """Verify that the barostat can handle a subset of the molecules
    rather than all of them. This test only verify that it runs, not the behavior"""
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    barostat_interval = 3
    collision_rate = 1.0 / unit.picosecond
    seed = 2021
    np.random.seed(seed)

    pressure = 1.0 * unit.atmosphere
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    # Cut the number of groups in half
    group_indices = group_indices[len(group_indices) // 2:]
    lam = 1.0

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    v_0 = sample_velocities(masses * unit.amu, temperature)

    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        barostat_interval,
        u_impls,
        seed,
    )

    ctxt = custom_ops.Context(coords,
                              v_0,
                              complex_box,
                              integrator_impl,
                              u_impls,
                              barostat=baro)
    ctxt.multiple_steps(np.ones(1000) * lam)
Esempio n. 17
0
    def _simulate(lamb, box, x0, v0, final_potentials, integrator, equil_steps, prod_steps):
        all_impls = []
        bonded_impls = []
        nonbonded_impls = []

        # set up observables for du_dps here as well.

        du_dp_obs = []

        for bps in final_potentials:
            obs_list = []

            for bp in bps:
                impl = bp.bound_impl(np.float32)

                if isinstance(bp, potentials.InterpolatedPotential) or isinstance(bp, potentials.LambdaPotential):
                    bp = bp.get_u_fn()

                if isinstance(bp, potentials.Nonbonded):
                    nonbonded_impls.append(impl)
                else:
                    bonded_impls.append(impl)

                all_impls.append(impl)
                obs_list.append(custom_ops.AvgPartialUPartialParam(impl, 5))

            du_dp_obs.append(obs_list)

        intg_impl = integrator.impl()
        # context components: positions, velocities, box, integrator, energy fxns
        ctxt = custom_ops.Context(
            x0,
            v0,
            box,
            intg_impl,
            all_impls
        )

        # equilibration
        for step in range(equil_steps):
            ctxt.step(lamb)

        bonded_du_dl_obs = custom_ops.FullPartialUPartialLambda(bonded_impls, 5)
        nonbonded_du_dl_obs = custom_ops.FullPartialUPartialLambda(nonbonded_impls, 5)

        # add observable
        ctxt.add_observable(bonded_du_dl_obs)
        ctxt.add_observable(nonbonded_du_dl_obs)

        for obs_list in du_dp_obs:
            for obs in obs_list:
                ctxt.add_observable(obs)

        for _ in range(prod_steps):
            ctxt.step(lamb)

        bonded_full_du_dls = bonded_du_dl_obs.full_du_dl()
        nonbonded_full_du_dls = nonbonded_du_dl_obs.full_du_dl()

        bonded_mean, bonded_std = np.mean(bonded_full_du_dls), np.std(bonded_full_du_dls)
        nonbonded_mean, nonbonded_std = np.mean(nonbonded_full_du_dls), np.std(nonbonded_full_du_dls)

        # keep the structure of grads the same as that of final_potentials so we can properly
        # form their vjps.
        grads = []
        for obs_list in du_dp_obs:
            grad_list = []
            for obs in obs_list:
                grad_list.append(obs.avg_du_dp())
            grads.append(grad_list)

        return (bonded_mean, bonded_std), (nonbonded_mean, nonbonded_std), grads
Esempio n. 18
0
def benchmark_dhfr():

    pdb_path = 'tests/data/5dfr_solv_equil.pdb'
    host_pdb = app.PDBFile(pdb_path)
    protein_ff = app.ForceField('amber99sbildn.xml', 'tip3p.xml')
    host_system = protein_ff.createSystem(
        host_pdb.topology,
        nonbondedMethod=app.NoCutoff,
        constraints=None,
        rigidWater=False
    )
    host_coords = host_pdb.positions
    box = host_pdb.topology.getPeriodicBoxVectors()
    box = np.asarray(box/box.unit)

    host_fns, host_masses = openmm_deserializer.deserialize_system(
        host_system,
        cutoff=1.0
    )

    host_conf = []
    for x,y,z in host_coords:
        host_conf.append([to_md_units(x),to_md_units(y),to_md_units(z)])
    host_conf = np.array(host_conf)

    seed = 1234
    dt = 1.5e-3

    intg = LangevinIntegrator(
        300,
        dt,
        1.0,
        np.array(host_masses),
        seed
    ).impl()

    bps = []

    for potential in host_fns:
        bps.append(potential.bound_impl(precision=np.float32)) # get the bound implementation

    x0 = host_conf
    v0 = np.zeros_like(host_conf)

    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg,
        bps
    )

    # initialize observables
    obs = []
    for bp in bps:
        du_dp_obs = custom_ops.AvgPartialUPartialParam(bp, 100)
        ctxt.add_observable(du_dp_obs)
        obs.append(du_dp_obs)

    lamb = 0.0

    start = time.time()
    # num_steps = 50000
    num_steps = 50000
    # num_steps = 10

    writer = PDBWriter([host_pdb.topology], "dhfr.pdb")

    for step in range(num_steps):
        ctxt.step(lamb)
        if step % 1000 == 0:

            delta = time.time()-start
            steps_per_second = step/delta
            seconds_per_day = 86400
            steps_per_day = steps_per_second*seconds_per_day
            ps_per_day = dt*steps_per_day
            ns_per_day = ps_per_day*1e-3

            print(step, "ns/day", ns_per_day)
            # coords = recenter(ctxt.get_x_t(), box)
            # writer.write_frame(coords*10)

    print("total time", time.time() - start)

    writer.close()


    # bond angle torsions nonbonded
    for potential, du_dp_obs in zip(host_fns, obs):
        dp = du_dp_obs.avg_du_dp()
        print(potential, dp.shape)
        print(dp)
Esempio n. 19
0
def run_leg(
    orig_host_coords,
    orig_guest_coords,
    combined_bps,
    combined_masses,
    host_box,
    guest_name,
    leg_type,
    host_mol,
    guest_mol,
    outdir,
    num_deletions,
    deletion_steps,
    insertion_max_lambda,
    insertion_steps,
    eq1_steps,
    fewer_outfiles=False,
    no_outfiles=False,
):
    x0 = np.concatenate([orig_host_coords, orig_guest_coords])
    v0 = np.zeros_like(x0)
    print(
        f"{leg_type.upper()}_SYSTEM",
        f"guest_name: {guest_name}",
        f"num_atoms: {len(x0)}",
    )

    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

    # insert guest
    insertion_lambda_schedule = np.linspace(insertion_max_lambda, MIN_LAMBDA,
                                            insertion_steps)

    ctxt.multiple_steps(insertion_lambda_schedule, 0)  # do not collect du_dls

    lamb = insertion_lambda_schedule[-1]
    step = len(insertion_lambda_schedule) - 1

    report.report_step(
        ctxt,
        step,
        lamb,
        host_box,
        combined_bps,
        u_impls,
        guest_name,
        insertion_steps,
        f"{leg_type.upper()}_INSERTION",
    )
    if not fewer_outfiles and not no_outfiles:
        host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10
        guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10
        report.write_frame(
            host_coords,
            host_mol,
            guest_coords,
            guest_mol,
            guest_name,
            outdir,
            str(step).zfill(len(str(insertion_steps))),
            f"{leg_type}-ins",
        )
    if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls):
        return []

    # equilibrate
    equil_lambda_schedule = np.ones(eq1_steps) * MIN_LAMBDA
    lamb = equil_lambda_schedule[-1]
    step = len(equil_lambda_schedule) - 1
    ctxt.multiple_steps(equil_lambda_schedule, 0)
    report.report_step(
        ctxt,
        step,
        MIN_LAMBDA,
        host_box,
        combined_bps,
        u_impls,
        guest_name,
        eq1_steps,
        f"{leg_type.upper()}_EQUILIBRATION_1",
    )
    if not fewer_outfiles and not no_outfiles:
        host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10
        guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10
        report.write_frame(
            host_coords,
            host_mol,
            guest_coords,
            guest_mol,
            guest_name,
            outdir,
            str(step).zfill(len(str(eq1_steps))),
            f"{leg_type}-eq1",
        )
    if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps,
                             u_impls):
        print("Too much force")
        return []

    # equilibrate more & shoot off deletion jobs
    steps_per_batch = 1001
    works = []
    for b in range(num_deletions):
        deletion_lambda_schedule = np.ones(steps_per_batch) * MIN_LAMBDA

        ctxt.multiple_steps(deletion_lambda_schedule, 0)
        lamb = deletion_lambda_schedule[-1]
        step = len(deletion_lambda_schedule) - 1
        report.report_step(
            ctxt,
            (b + 1) * step,
            MIN_LAMBDA,
            host_box,
            combined_bps,
            u_impls,
            guest_name,
            num_deletions * steps_per_batch,
            f"{leg_type.upper()}_EQUILIBRATION_2",
        )

        # TODO: if guest has undocked, stop simulation
        if not no_outfiles:
            host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10
            guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10
            report.write_frame(
                host_coords,
                host_mol,
                guest_coords,
                guest_mol,
                guest_name,
                outdir,
                str((b + 1) * step).zfill(
                    len(str(num_deletions * steps_per_batch))),
                f"{leg_type}-eq2",
            )
        if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps,
                                 u_impls):
            print("Too much force")
            return works

        work = do_deletion(
            ctxt.get_x_t(),
            ctxt.get_v_t(),
            combined_bps,
            combined_masses,
            host_box,
            guest_name,
            leg_type,
            u_impls,
            deletion_steps,
        )
        works.append(work)

    return works
Esempio n. 20
0
def simulate(
    lamb,
    box,
    x0,
    v0,
    final_potentials,
    integrator,
    equil_steps,
    prod_steps,
    barostat,
    x_interval=1000,
    du_dl_interval=5,
) -> SimulationResult:
    """
    Run a simulation and collect relevant statistics for this simulation.

    Parameters
    ----------
    lamb: float
        lambda parameter

    box: np.array
        3x3 numpy array of the box, dtype should be np.float64

    x0: np.array
        Nx3 numpy array of the coordinates

    v0: np.array
        Nx3 numpy array of the velocities

    final_potentials: list
        list of unbound potentials

    integrator: timemachine.Integrator
        integrator to be used for dynamics

    equil_steps: int
        number of equilibration steps

    prod_steps: int
        number of production steps

    x_interval: int
        how often we store coordinates. if x_interval == 0 then
        no frames are returned.

    du_dl_interval: int
        how often we store du_dls. if du_dl_interval == 0 then
        no du_dls are returned

    barostat: timemachine.lib.MonteCarloBarostat
        Monte carlo barostat to use when simulating.

    Returns
    -------
    SimulationResult
        Results of the simulation.

    """
    all_impls = []
    bonded_impls = []
    nonbonded_impls = []

    for bp in final_potentials:
        impl = bp.bound_impl(np.float32)
        if isinstance(bp, potentials.Nonbonded):
            nonbonded_impls.append(impl)
        else:
            bonded_impls.append(impl)
        all_impls.append(impl)

    if integrator.seed == 0:
        integrator = copy.deepcopy(integrator)
        integrator.seed = np.random.randint(np.iinfo(np.int32).max)

    if barostat.seed == 0:
        barostat = copy.deepcopy(barostat)
        barostat.seed = np.random.randint(np.iinfo(np.int32).max)

    intg_impl = integrator.impl()
    baro_impl = barostat.impl(all_impls)
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg_impl,
        all_impls,
        barostat=baro_impl,
    )
    base_interval = baro_impl.get_interval()
    # Use an interval of 5 for equilibration
    baro_impl.set_interval(5)

    # equilibration
    equil_schedule = np.ones(equil_steps) * lamb
    ctxt.multiple_steps(equil_schedule)

    baro_impl.set_interval(base_interval)

    prod_schedule = np.ones(prod_steps) * lamb

    full_du_dls, xs, _ = ctxt.multiple_steps(prod_schedule, du_dl_interval,
                                             x_interval)

    result = SimulationResult(xs=xs.astype("float32"), du_dls=full_du_dls)
    return result
Esempio n. 21
0
    def test_fwd_mode(self):
        """
        This test ensures that we can reverse-mode differentiate
        observables that are dU_dlambdas of each state. We provide
        adjoints with respect to each computed dU/dLambda.
        """

        np.random.seed(4321)

        N = 8
        B = 5
        A = 0
        T = 0
        D = 3

        x0 = np.random.rand(N, D).astype(dtype=np.float64) * 2

        E = 2

        lambda_plane_idxs = np.random.randint(low=0,
                                              high=2,
                                              size=N,
                                              dtype=np.int32)
        lambda_offset_idxs = np.random.randint(low=0,
                                               high=2,
                                               size=N,
                                               dtype=np.int32)

        params, ref_nrg_fn, test_nrg = prepare_nb_system(
            x0,
            E,
            lambda_plane_idxs,
            lambda_offset_idxs,
            p_scale=3.0,
            # cutoff=0.5,
            cutoff=1.5)

        masses = np.random.rand(N)

        v0 = np.random.rand(x0.shape[0], x0.shape[1])
        N = len(masses)

        num_steps = 5
        lambda_schedule = np.random.rand(num_steps)
        ca = np.random.rand()
        cbs = -np.random.rand(len(masses)) / 1
        ccs = np.zeros_like(cbs)

        dt = 2e-3
        lamb = np.random.rand()

        def loss_fn(du_dls):
            return jnp.sum(du_dls * du_dls) / du_dls.shape[0]

        def sum_loss_fn(du_dls):
            du_dls = np.sum(du_dls, axis=0)
            return jnp.sum(du_dls * du_dls) / du_dls.shape[0]

        def integrate_once_through(x_t, v_t, box, params):

            dU_dx_fn = jax.grad(ref_nrg_fn, argnums=(0, ))
            dU_dp_fn = jax.grad(ref_nrg_fn, argnums=(1, ))
            dU_dl_fn = jax.grad(ref_nrg_fn, argnums=(3, ))

            all_du_dls = []
            all_du_dps = []
            all_xs = []
            all_du_dxs = []
            all_us = []
            for step in range(num_steps):
                u = ref_nrg_fn(x_t, params, box, lamb)
                all_us.append(u)
                du_dl = dU_dl_fn(x_t, params, box, lamb)[0]
                all_du_dls.append(du_dl)
                du_dp = dU_dp_fn(x_t, params, box, lamb)[0]
                all_du_dps.append(du_dp)
                du_dx = dU_dx_fn(x_t, params, box, lamb)[0]
                all_du_dxs.append(du_dx)
                v_t = ca * v_t + np.expand_dims(cbs, axis=-1) * du_dx
                x_t = x_t + v_t * dt
                all_xs.append(x_t)
                # note that we do not calculate the du_dl of the last frame.

            return all_xs, all_du_dxs, all_du_dps, all_du_dls, all_us

        box = np.eye(3) * 1.5

        # when we have multiple parameters, we need to set this up correctly
        ref_all_xs, ref_all_du_dxs, ref_all_du_dps, ref_all_du_dls, ref_all_us = integrate_once_through(
            x0, v0, box, params)

        intg = custom_ops.LangevinIntegrator(dt, ca, cbs, ccs, 1234)

        bp = test_nrg.bind(params).bound_impl(precision=np.float64)
        bps = [bp]

        ctxt = custom_ops.Context(x0, v0, box, intg, bps)

        test_obs = custom_ops.AvgPartialUPartialParam(bp, 1)
        test_obs_f2 = custom_ops.AvgPartialUPartialParam(bp, 2)

        test_obs_du_dl = custom_ops.AvgPartialUPartialLambda(bps, 1)
        test_obs_f2_du_dl = custom_ops.AvgPartialUPartialLambda(bps, 2)
        test_obs_f3_du_dl = custom_ops.FullPartialUPartialLambda(bps, 2)

        obs = [
            test_obs, test_obs_f2, test_obs_du_dl, test_obs_f2_du_dl,
            test_obs_f3_du_dl
        ]

        for o in obs:
            ctxt.add_observable(o)

        for step in range(num_steps):
            print("comparing step", step)
            ctxt.step(lamb)
            test_x_t = ctxt.get_x_t()
            test_v_t = ctxt.get_v_t()
            test_du_dx_t = ctxt._get_du_dx_t_minus_1()
            # test_u_t = ctxt._get_u_t_minus_1()
            # np.testing.assert_allclose(test_u_t, ref_all_us[step])
            np.testing.assert_allclose(test_du_dx_t, ref_all_du_dxs[step])
            np.testing.assert_allclose(test_x_t, ref_all_xs[step])

        ref_avg_du_dls = np.mean(ref_all_du_dls, axis=0)
        ref_avg_du_dls_f2 = np.mean(ref_all_du_dls[::2], axis=0)

        np.testing.assert_allclose(test_obs_du_dl.avg_du_dl(), ref_avg_du_dls)
        np.testing.assert_allclose(test_obs_f2_du_dl.avg_du_dl(),
                                   ref_avg_du_dls_f2)

        full_du_dls = test_obs_f3_du_dl.full_du_dl()
        assert len(full_du_dls) == np.ceil(num_steps / 2)
        np.testing.assert_allclose(np.mean(full_du_dls), ref_avg_du_dls_f2)

        ref_avg_du_dps = np.mean(ref_all_du_dps, axis=0)
        ref_avg_du_dps_f2 = np.mean(ref_all_du_dps[::2], axis=0)

        # the fixed point accumulator makes it hard to converge some of these
        # if the derivative is super small - in which case they probably don't matter
        # anyways
        np.testing.assert_allclose(test_obs.avg_du_dp()[:, 0],
                                   ref_avg_du_dps[:, 0], 1.5e-6)
        np.testing.assert_allclose(test_obs.avg_du_dp()[:, 1],
                                   ref_avg_du_dps[:, 1], 1.5e-6)
        np.testing.assert_allclose(test_obs.avg_du_dp()[:, 2],
                                   ref_avg_du_dps[:, 2], 5e-5)
Esempio n. 22
0
    def Simulate(self, request, context):

        if request.precision == 'single':
            precision = np.float32
        elif request.precision == 'double':
            precision = np.float64
        else:
            raise Exception("Unknown precision")

        simulation = pickle.loads(request.simulation)

        bps = []
        pots = []

        for potential in simulation.potentials:
            bps.append(potential.bound_impl())  # get the bound implementation

        intg = simulation.integrator.impl()

        ctxt = custom_ops.Context(simulation.x, simulation.v, simulation.box,
                                  intg, bps)

        lamb = request.lamb

        for step, minimize_lamb in enumerate(
                np.linspace(1.0, lamb, request.prep_steps)):
            ctxt.step(minimize_lamb)

        energies = []
        frames = []

        if request.observe_du_dl_freq > 0:
            du_dl_obs = custom_ops.AvgPartialUPartialLambda(
                bps, request.observe_du_dl_freq)
            ctxt.add_observable(du_dl_obs)

        if request.observe_du_dp_freq > 0:
            du_dps = []
            # for name, bp in zip(names, bps):
            # if name == 'LennardJones' or name == 'Electrostatics':
            for bp in bps:
                du_dp_obs = custom_ops.AvgPartialUPartialParam(
                    bp, request.observe_du_dp_freq)
                ctxt.add_observable(du_dp_obs)
                du_dps.append(du_dp_obs)

        # dynamics
        for step in range(request.prod_steps):
            if step % 100 == 0:
                u = ctxt._get_u_t_minus_1()
                energies.append(u)

            if request.n_frames > 0:
                interval = max(1, request.prod_steps // request.n_frames)
                if step % interval == 0:
                    frames.append(ctxt.get_x_t())

            ctxt.step(lamb)

        frames = np.array(frames)

        if request.observe_du_dl_freq > 0:
            avg_du_dls = du_dl_obs.avg_du_dl()
        else:
            avg_du_dls = None

        if request.observe_du_dp_freq > 0:
            avg_du_dps = []
            for obs in du_dps:
                avg_du_dps.append(obs.avg_du_dp())
        else:
            avg_du_dps = None

        return service_pb2.SimulateReply(
            avg_du_dls=pickle.dumps(avg_du_dls),
            avg_du_dps=pickle.dumps(avg_du_dps),
            energies=pickle.dumps(energies),
            frames=pickle.dumps(frames),
        )
Esempio n. 23
0
    def test_fwd_mode(self):
        """
        This test ensures that we can reverse-mode differentiate
        observables that are dU_dlambdas of each state. We provide
        adjoints with respect to each computed dU/dLambda.
        """

        np.random.seed(4321)

        N = 8
        D = 3

        x0 = np.random.rand(N, D).astype(dtype=np.float64) * 2

        E = 2

        lambda_plane_idxs = np.random.randint(low=0,
                                              high=2,
                                              size=N,
                                              dtype=np.int32)
        lambda_offset_idxs = np.random.randint(low=0,
                                               high=2,
                                               size=N,
                                               dtype=np.int32)

        params, ref_nrg_fn, test_nrg = prepare_nb_system(
            x0,
            E,
            lambda_plane_idxs,
            lambda_offset_idxs,
            p_scale=3.0,
            # cutoff=0.5,
            cutoff=1.0,
        )

        masses = np.random.rand(N)

        v0 = np.random.rand(x0.shape[0], x0.shape[1])

        num_steps = 5
        temperature = 300
        dt = 2e-3
        friction = 0.0
        ca, cbs, ccs = langevin_coefficients(temperature, dt, friction, masses)

        # not convenient to simulate identical trajectories otherwise
        assert (ccs == 0).all()

        lamb = np.random.rand()
        lambda_windows = np.array([lamb + 0.05, lamb, lamb - 0.05])

        def integrate_once_through(x_t, v_t, box, params):

            dU_dx_fn = jax.grad(ref_nrg_fn, argnums=(0, ))
            dU_dp_fn = jax.grad(ref_nrg_fn, argnums=(1, ))
            dU_dl_fn = jax.grad(ref_nrg_fn, argnums=(3, ))

            all_du_dls = []
            all_du_dps = []
            all_xs = []
            all_du_dxs = []
            all_us = []
            all_lambda_us = []
            for step in range(num_steps):
                u = ref_nrg_fn(x_t, params, box, lamb)
                all_us.append(u)
                du_dl = dU_dl_fn(x_t, params, box, lamb)[0]
                all_du_dls.append(du_dl)
                du_dp = dU_dp_fn(x_t, params, box, lamb)[0]
                all_du_dps.append(du_dp)
                du_dx = dU_dx_fn(x_t, params, box, lamb)[0]
                all_du_dxs.append(du_dx)
                all_xs.append(x_t)

                lus = []
                for lamb_u in lambda_windows:
                    lus.append(ref_nrg_fn(x_t, params, box, lamb_u))

                all_lambda_us.append(lus)
                noise = np.random.randn(*v_t.shape)

                v_mid = v_t + np.expand_dims(cbs, axis=-1) * du_dx

                v_t = ca * v_mid + np.expand_dims(ccs, axis=-1) * noise
                x_t += 0.5 * dt * (v_mid + v_t)

                # note that we do not calculate the du_dl of the last frame.
            return all_xs, all_du_dxs, all_du_dps, all_du_dls, all_us, all_lambda_us

        box = np.eye(3) * 3.0

        # when we have multiple parameters, we need to set this up correctly
        (
            ref_all_xs,
            ref_all_du_dxs,
            ref_all_du_dps,
            ref_all_du_dls,
            ref_all_us,
            ref_all_lambda_us,
        ) = integrate_once_through(x0, v0, box, params)

        intg = custom_ops.LangevinIntegrator(dt, ca, cbs, ccs, 1234)

        bp = test_nrg.bind(params).bound_impl(precision=np.float64)
        bps = [bp]

        ctxt = custom_ops.Context(x0, v0, box, intg, bps)

        for step in range(num_steps):
            print("comparing step", step)
            test_x_t = ctxt.get_x_t()
            np.testing.assert_allclose(test_x_t, ref_all_xs[step])
            ctxt.step(lamb)
            test_du_dx_t = ctxt._get_du_dx_t_minus_1()
            # test_u_t = ctxt._get_u_t_minus_1()
            # np.testing.assert_allclose(test_u_t, ref_all_us[step])
            np.testing.assert_allclose(test_du_dx_t, ref_all_du_dxs[step])

        # test the multiple_steps method
        ctxt_2 = custom_ops.Context(x0, v0, box, intg, bps)

        lambda_schedule = np.ones(num_steps) * lamb

        du_dl_interval = 3
        x_interval = 2
        start_box = ctxt_2.get_box()
        test_du_dls, test_xs, test_boxes = ctxt_2.multiple_steps(
            lambda_schedule, du_dl_interval, x_interval)
        end_box = ctxt_2.get_box()

        np.testing.assert_allclose(test_du_dls,
                                   ref_all_du_dls[::du_dl_interval])

        np.testing.assert_allclose(test_xs, ref_all_xs[::x_interval])
        np.testing.assert_array_equal(start_box, end_box)
        for i in range(test_boxes.shape[0]):
            np.testing.assert_array_equal(start_box, test_boxes[i])
        self.assertEqual(test_boxes.shape[0], test_xs.shape[0])
        self.assertEqual(test_boxes.shape[1], D)
        self.assertEqual(test_boxes.shape[2], test_xs.shape[2])

        # test the multiple_steps_U method
        ctxt_3 = custom_ops.Context(x0, v0, box, intg, bps)

        u_interval = 3

        test_us, test_xs, test_boxes = ctxt_3.multiple_steps_U(
            lamb, num_steps, lambda_windows, u_interval, x_interval)

        np.testing.assert_array_almost_equal(ref_all_lambda_us[::u_interval],
                                             test_us)

        np.testing.assert_array_almost_equal(ref_all_xs[::x_interval], test_xs)

        test_us, test_xs, test_boxes = ctxt_3.multiple_steps_U(
            lamb, num_steps, np.array([], dtype=np.float64), u_interval,
            x_interval)

        assert test_us.shape == (2, 0)
Esempio n. 24
0
def equilibrate_host(
    mol: Chem.Mol,
    host_system: openmm.System,
    host_coords: NDArray,
    temperature: float,
    pressure: float,
    ff: Forcefield,
    box: NDArray,
    n_steps: int,
    seed: Optional[int] = None,
) -> Tuple[NDArray, NDArray]:
    """
    Equilibrate a host system given a reference molecule using the MonteCarloBarostat.

    Useful for preparing a host that will be used for multiple FEP calculations using the same reference, IE a starmap.

    Performs the following:
    - Minimize host with rigid mol
    - Minimize host and mol
    - Run n_steps with HMR enabled and MonteCarloBarostat every 5 steps

    Parameters
    ----------
    mol: Chem.Mol
        Ligand for the host to equilibrate with.

    host_system: openmm.System
        OpenMM System representing the host.

    host_coords: np.ndarray
        N x 3 coordinates of the host. units of nanometers.

    temperature: float
        Temperature at which to run the simulation. Units of kelvins.

    pressure: float
        Pressure at which to run the simulation. Units of bars.

    ff: ff.Forcefield
        Wrapper class around a list of handlers.

    box: np.ndarray [3,3]
        Box matrix for periodic boundary conditions. units of nanometers.

    n_steps: int
        Number of steps to run the simulation for.

    seed: int or None
        Value to seed simulation with

    Returns
    -------
    tuple (coords, box)
        Returns equilibrated system coords as well as the box.

    """
    # insert mol into the binding pocket.
    host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2)

    min_host_coords = minimize_host_4d([mol], host_system, host_coords, ff, box)

    ligand_masses = [a.GetMass() for a in mol.GetAtoms()]
    ligand_coords = get_romol_conf(mol)

    combined_masses = np.concatenate([host_masses, ligand_masses])
    combined_coords = np.concatenate([min_host_coords, ligand_coords])

    top = topology.BaseTopology(mol, ff)
    hgt = topology.HostGuestTopology(host_bps, top)

    # setup the parameter handlers for the ligand
    tuples = [
        [hgt.parameterize_harmonic_bond, [ff.hb_handle]],
        [hgt.parameterize_harmonic_angle, [ff.ha_handle]],
        [hgt.parameterize_periodic_torsion, [ff.pt_handle, ff.it_handle]],
        [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]],
    ]

    u_impls = []
    bound_potentials = []

    for fn, handles in tuples:
        params, potential = fn(*[h.params for h in handles])
        bp = potential.bind(params)
        bound_potentials.append(bp)
        u_impls.append(bp.bound_impl(precision=np.float32))

    bond_list = get_bond_list(bound_potentials[0])
    combined_masses = model_utils.apply_hmr(combined_masses, bond_list)

    dt = 2.5e-3
    friction = 1.0

    if seed is None:
        seed = np.random.randint(np.iinfo(np.int32).max)

    integrator = LangevinIntegrator(temperature, dt, friction, combined_masses, seed).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    group_indices = get_group_indices(bond_list)
    barostat_interval = 5
    barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_indices, barostat_interval, seed).impl(
        u_impls
    )

    # Re-minimize with the mol being flexible
    x0 = fire_minimize(x0, u_impls, box, np.ones(50))
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, integrator, u_impls, barostat)

    ctxt.multiple_steps(np.linspace(0.0, 0.0, n_steps))

    return ctxt.get_x_t(), ctxt.get_box()
Esempio n. 25
0
def run_leg(
    combined_coords,
    combined_bps,
    combined_masses,
    host_box,
    guest_name,
    leg_type,
    num_switches,
    transition_steps,
):
    x0 = combined_coords
    v0 = np.zeros_like(x0)
    print(
        f"{leg_type.upper()}_SYSTEM",
        f"guest_name: {guest_name}",
        f"num_atoms: {len(x0)}",
    )

    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

    # TODO: pre-equilibrate?

    # equilibrate & shoot off switching jobs
    steps_per_batch = 1001

    works = []
    for b in range(num_switches):
        equil2_lambda_schedule = np.ones(steps_per_batch) * MIN_LAMBDA
        ctxt.multiple_steps(equil2_lambda_schedule, 0)
        lamb = equil2_lambda_schedule[-1]
        step = len(equil2_lambda_schedule) - 1
        report.report_step(
            ctxt,
            (b + 1) * step,
            lamb,
            host_box,
            combined_bps,
            u_impls,
            guest_name,
            num_switches * steps_per_batch,
            f"{leg_type.upper()}_EQUILIBRATION_2",
        )

        if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps,
                                 u_impls):
            return

        work = do_switch(
            ctxt.get_x_t(),
            ctxt.get_v_t(),
            combined_bps,
            combined_masses,
            host_box,
            guest_name,
            leg_type,
            u_impls,
            transition_steps,
        )
        works.append(work)

    return works
Esempio n. 26
0
def minimize_host_4d(mols, host_system, host_coords, ff, box, mol_coords=None) -> np.ndarray:
    """
    Insert mols into a host system via 4D decoupling using Fire minimizer at lambda=1.0,
    0 Kelvin Langevin integration at a sequence of lambda from 1.0 to 0.0, and Fire minimizer again at lambda=0.0

    The ligand coordinates are fixed during this, and only host_coords are minimized.

    Parameters
    ----------
    mols: list of Chem.Mol
        Ligands to be inserted. This must be of length 1 or 2 for now.

    host_system: openmm.System
        OpenMM System representing the host

    host_coords: np.ndarray
        N x 3 coordinates of the host. units of nanometers.

    ff: ff.Forcefield
        Wrapper class around a list of handlers

    box: np.ndarray [3,3]
        Box matrix for periodic boundary conditions. units of nanometers.

    mol_coords: list of np.ndarray
        Pre-specify a list of mol coords. Else use the mol.GetConformer(0)

    Returns
    -------
    np.ndarray
        This returns minimized host_coords.

    """

    assert box.shape == (3, 3)

    host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2)

    num_host_atoms = host_coords.shape[0]

    if len(mols) == 1:
        top = topology.BaseTopology(mols[0], ff)
    elif len(mols) == 2:
        top = topology.DualTopologyMinimization(mols[0], mols[1], ff)
    else:
        raise ValueError("mols must be length 1 or 2")

    mass_list = [np.array(host_masses)]
    conf_list = [np.array(host_coords)]
    for mol in mols:
        # mass increase is to keep the ligand fixed
        mass_list.append(np.array([a.GetMass() * 100000 for a in mol.GetAtoms()]))

    if mol_coords is not None:
        for mc in mol_coords:
            conf_list.append(mc)
    else:
        for mol in mols:
            conf_list.append(get_romol_conf(mol))

    combined_masses = np.concatenate(mass_list)
    combined_coords = np.concatenate(conf_list)

    hgt = topology.HostGuestTopology(host_bps, top)

    u_impls = bind_potentials(hgt, ff)

    # this value doesn't matter since we will turn off the noise.
    seed = 0

    intg = LangevinIntegrator(0.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    x0 = fire_minimize(x0, u_impls, box, np.ones(50))
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)
    ctxt.multiple_steps(np.linspace(1.0, 0, 1000))

    final_coords = fire_minimize(ctxt.get_x_t(), u_impls, box, np.zeros(50))
    for impl in u_impls:
        du_dx, _, _ = impl.execute(final_coords, box, 0.0)
        norm = np.linalg.norm(du_dx, axis=-1)
        assert np.all(norm < 25000)

    return final_coords[:num_host_atoms]
Esempio n. 27
0
def minimize_host_4d(romol, host_system, host_coords, ff, box):
    """
    Insert romol into a host system via 4D decoupling under a Langevin thermostat.
    The ligand coordinates are fixed during this, and only host_coordinates are minimized.

    Parameters
    ----------
    romol: ROMol
        Ligand to be inserted. It must be embedded.

    host_system: openmm.System
        OpenMM System representing the host

    host_coords: np.ndarray
        N x 3 coordinates of the host. units of nanometers.

    ff: ff.Forcefield
        Wrapper class around a list of handlers

    box: np.ndarray [3,3]
        Box matrix for periodic boundary conditions. units of nanometers.

    Returns
    -------
    np.ndarray
        This returns minimized host_coords.

    """

    host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2)

    # keep the ligand rigid
    ligand_masses = [a.GetMass()*100000 for a in romol.GetAtoms()]
    combined_masses = np.concatenate([host_masses, ligand_masses])
    ligand_coords = get_romol_conf(romol)
    combined_coords = np.concatenate([host_coords, ligand_coords])
    num_host_atoms = host_coords.shape[0]

    final_potentials = []
    for bp in host_bps:
        if isinstance(bp, potentials.Nonbonded):
            host_p = bp
        else:
            final_potentials.append(bp)

    gbt = topology.BaseTopology(romol, ff)
    hgt = topology.HostGuestTopology(host_p, gbt)

    # setup the parameter handlers for the ligand
    tuples = [
        [hgt.parameterize_harmonic_bond, [ff.hb_handle]],
        [hgt.parameterize_harmonic_angle, [ff.ha_handle]],
        [hgt.parameterize_proper_torsion, [ff.pt_handle]],
        [hgt.parameterize_improper_torsion, [ff.it_handle]],
        [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]],
    ]

    for fn, handles in tuples:
        params, potential = fn(*[h.params for h in handles])
        final_potentials.append(potential.bind(params))

    seed = 2020

    intg = LangevinIntegrator(
        300.0,
        1.5e-3,
        1.0,
        combined_masses,
        seed
    ).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    u_impls = []

    for bp in final_potentials:
        fn = bp.bound_impl(precision=np.float32)
        u_impls.append(fn)

    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg,
        u_impls
    )

    for lamb in np.linspace(1.0, 0, 1000):
        ctxt.step(lamb)

    return ctxt.get_x_t()[:num_host_atoms]
Esempio n. 28
0
def run_leg(
    orig_host_coords,
    orig_guest_coords,
    combined_bps,
    combined_masses,
    host_box,
    guest_name,
    leg_type,
    host_mol,
    guest_mol,
    outdir,
    fewer_outfiles=False,
    no_outfiles=False,
):
    x0 = np.concatenate([orig_host_coords, orig_guest_coords])
    v0 = np.zeros_like(x0)
    print(
        f"{leg_type.upper()}_SYSTEM",
        f"guest_name: {guest_name}",
        f"num_atoms: {len(x0)}",
    )

    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

    # insert guest
    insertion_lambda_schedule = np.linspace(
        INSERTION_MAX_LAMBDA, MIN_LAMBDA, TRANSITION_STEPS
    )
    for step, lamb in enumerate(insertion_lambda_schedule):
        ctxt.step(lamb)
        if step % 100 == 0:
            report.report_step(
                ctxt,
                step,
                lamb,
                host_box,
                combined_bps,
                u_impls,
                guest_name,
                TRANSITION_STEPS,
                f"{leg_type.upper()}_INSERTION",
            )
            if not fewer_outfiles and not no_outfiles:
                host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10
                report.write_frame(
                    host_coords,
                    host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(TRANSITION_STEPS))),
                    f"{leg_type}-ins",
                )
        if step in (0, int(TRANSITION_STEPS/2), TRANSITION_STEPS-1):
            if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls):
                return

    # equilibrate
    for step in range(EQ1_STEPS):
        ctxt.step(MIN_LAMBDA)
        if step % 1000 == 0:
            report.report_step(
                ctxt,
                step,
                MIN_LAMBDA,
                host_box,
                combined_bps,
                u_impls,
                guest_name,
                EQ1_STEPS,
                f"{leg_type.upper()}_EQUILIBRATION_1",
            )
            if not fewer_outfiles and not no_outfiles:
                host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10
                report.write_frame(
                    host_coords,
                    host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(EQ1_STEPS))),
                    f"{leg_type}-eq1",
                )
        if step in (0, int(EQ1_STEPS/2), EQ1_STEPS-1):
            if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls):
                return

    # equilibrate more & shoot off deletion jobs
    for step in range(EQ2_STEPS):
        ctxt.step(MIN_LAMBDA)
        if step % 1000 == 0:
            report.report_step(
                ctxt,
                step,
                MIN_LAMBDA,
                host_box,
                combined_bps,
                u_impls,
                guest_name,
                EQ2_STEPS,
                f"{leg_type.upper()}_EQUILIBRATION_2",
            )

            # TODO: if guest has undocked, stop simulation
            if not no_outfiles:
                host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10
                report.write_frame(
                    host_coords,
                    host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(EQ2_STEPS))),
                    f"{leg_type}-eq2",
                )
            if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls):
                return

            do_deletion(
                ctxt.get_x_t(),
                ctxt.get_v_t(),
                combined_bps,
                combined_masses,
                host_box,
                guest_name,
                leg_type,
            )
Esempio n. 29
0
def dock_and_equilibrate(host_pdbfile,
                         guests_sdfile,
                         max_lambda,
                         insertion_steps,
                         eq_steps,
                         outdir,
                         fewer_outfiles=False,
                         constant_atoms=[]):
    """Solvates a host, inserts guest(s) into solvated host, equilibrates

    Parameters
    ----------

    host_pdbfile: path to host pdb file to dock into
    guests_sdfile: path to input sdf with guests to pose/dock
    max_lambda: lambda value the guest should insert from or delete to
        (recommended: 1.0 for work calulation, 0.25 to stay close to original pose)
        (must be =1 for work calculation to be applicable)
    insertion_steps: how many steps to insert the guest over (recommended: 501)
    eq_steps: how many steps of equilibration to do after insertion (recommended: 15001)
    outdir: where to write output (will be created if it does not already exist)
    fewer_outfiles: if True, will only write frames for the equilibration, not insertion
    constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation
        (1-indexed, like PDB files)

    Output
    ------

    A pdb & sdf file every 100 steps of insertion (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf])
    A pdb & sdf file every 1000 steps of equilibration (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf])
    stdout every 100(0) steps noting the step number, lambda value, and energy
    stdout for each guest noting the work of transition
    stdout for each guest noting how long it took to run

    Note
    ----
    If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py],
    the simulation for that guest will stop and the work will not be calculated.
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}
    MAX_LAMBDA = {max_lambda}
    INSERTION_STEPS = {insertion_steps}
    EQ_STEPS = {eq_steps}
    """)

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    # TODO: return topology from builders.build_protein_system
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    # sometimes water boxes are sad. Should be minimized first; this is a workaround
    host_box += np.eye(3) * 0.1
    print("host box", host_box)

    solvated_host_pdb = os.path.join(outdir, "solvated_host.pdb")
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    os.remove(solvated_host_pdb)
    final_host_potentials = []
    host_potentials, host_masses = openmm_deserializer.deserialize_system(
        solvated_host_system, cutoff=1.2)
    host_nb_bp = None
    for bp in host_potentials:
        if isinstance(bp, potentials.Nonbonded):
            # (ytz): hack to ensure we only have one nonbonded term
            assert host_nb_bp is None
            host_nb_bp = bp
        else:
            final_host_potentials.append(bp)

    # Run the procedure
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(),
                                     dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units
        guest_ff_handlers = deserialize_handlers(
            open(
                os.path.join(
                    os.path.dirname(os.path.abspath(__file__)),
                    "..",
                    "ff/params/smirnoff_1_1_0_ccc.py",
                )).read())
        ff = Forcefield(guest_ff_handlers)
        guest_base_top = topology.BaseTopology(guest_mol, ff)

        # combine host & guest
        hgt = topology.HostGuestTopology(host_nb_bp, guest_base_top)
        # setup the parameter handlers for the ligand
        bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle],
                         [hgt.parameterize_harmonic_angle, ff.ha_handle],
                         [hgt.parameterize_proper_torsion, ff.pt_handle],
                         [hgt.parameterize_improper_torsion, ff.it_handle]]
        combined_bps = list(final_host_potentials)
        # instantiate the vjps while parameterizing (forward pass)
        for fn, handle in bonded_tuples:
            params, potential = fn(handle.params)
            combined_bps.append(potential.bind(params))
        nb_params, nb_potential = hgt.parameterize_nonbonded(
            ff.q_handle.params, ff.lj_handle.params)
        combined_bps.append(nb_potential.bind(nb_params))
        guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()]
        combined_masses = np.concatenate([host_masses, guest_masses])

        x0 = np.concatenate([solvated_host_coords, orig_guest_coords])
        v0 = np.zeros_like(x0)
        print(
            f"SYSTEM",
            f"guest_name: {guest_name}",
            f"num_atoms: {len(x0)}",
        )

        for atom_num in constant_atoms:
            combined_masses[atom_num - 1] += 50000

        seed = 2021
        intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses,
                                  seed).impl()

        u_impls = []
        for bp in combined_bps:
            bp_impl = bp.bound_impl(precision=np.float32)
            u_impls.append(bp_impl)

        ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

        # collect a du_dl calculation once every other step
        subsample_freq = 2
        du_dl_obs = custom_ops.FullPartialUPartialLambda(
            u_impls, subsample_freq)
        ctxt.add_observable(du_dl_obs)

        # insert guest
        insertion_lambda_schedule = np.linspace(max_lambda, 0.0,
                                                insertion_steps)
        calc_work = True
        for step, lamb in enumerate(insertion_lambda_schedule):
            ctxt.step(lamb)
            if step % 100 == 0:
                report.report_step(ctxt, step, lamb, host_box, combined_bps,
                                   u_impls, guest_name, insertion_steps,
                                   "INSERTION")
                if not fewer_outfiles:
                    host_coords = ctxt.get_x_t()[:len(solvated_host_coords
                                                      )] * 10
                    guest_coords = ctxt.get_x_t()[len(solvated_host_coords
                                                      ):] * 10
                    report.write_frame(
                        host_coords,
                        solvated_host_mol,
                        guest_coords,
                        guest_mol,
                        guest_name,
                        outdir,
                        str(step).zfill(len(str(insertion_steps))),
                        f"ins",
                    )
            if step in (0, int(insertion_steps / 2), insertion_steps - 1):
                if report.too_much_force(ctxt, lamb, host_box, combined_bps,
                                         u_impls):
                    calc_work = False
                    break

        # Note: this condition only applies for ABFE, not RBFE
        if (abs(du_dl_obs.full_du_dl()[0]) > 0.001
                or abs(du_dl_obs.full_du_dl()[-1]) > 0.001):
            print("Error: du_dl endpoints are not ~0")
            calc_work = False

        if calc_work:
            work = np.trapz(du_dl_obs.full_du_dl(),
                            insertion_lambda_schedule[::subsample_freq])
            print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}")

        # equilibrate
        for step in range(eq_steps):
            ctxt.step(0.00)
            if step % 1000 == 0:
                report.report_step(ctxt, step, 0.00, host_box, combined_bps,
                                   u_impls, guest_name, eq_steps,
                                   'EQUILIBRATION')
                host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10
                report.write_frame(
                    host_coords,
                    solvated_host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(eq_steps))),
                    f"eq",
                )
            if step in (0, int(eq_steps / 2), eq_steps - 1):
                if report.too_much_force(ctxt, 0.00, host_box, combined_bps,
                                         u_impls):
                    break

        end_time = time.time()
        print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
Esempio n. 30
0
def test_molecular_ideal_gas():
    """


    References
    ----------
    OpenMM testIdealGas
    https://github.com/openmm/openmm/blob/d8ef57fed6554ec95684e53768188e1f666405c9/tests/TestMonteCarloBarostat.h#L86-L140
    """

    # simulation parameters
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    collision_rate = 1.0 / unit.picosecond
    n_moves = 10000
    barostat_interval = 5
    seed = 2021

    # thermodynamic parameters
    temperatures = np.array([300, 600, 1000]) * unit.kelvin
    pressure = 100.0 * unit.bar  # very high pressure, to keep the expected volume small

    # generate an alchemical system of a waterbox + alchemical ligand:
    # effectively discard ligands by running in AbsoluteFreeEnergy mode at lambda = 1.0
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    _unbound_potentials, _sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # drop the nonbonded potential
    unbound_potentials = _unbound_potentials[:-1]
    sys_params = _sys_params[:-1]

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    volume_trajs = []

    relative_tolerance = 1e-2
    initial_relative_box_perturbation = 2 * relative_tolerance

    n_molecules = complex_top.getNumResidues()

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    # expected volume
    md_pressure_unit = ENERGY_UNIT / DISTANCE_UNIT**3
    pressure_in_md = (
        pressure * unit.AVOGADRO_CONSTANT_NA).value_in_unit(md_pressure_unit)
    expected_volume_in_md = (n_molecules +
                             1) * BOLTZ * temperatures.value_in_unit(
                                 unit.kelvin) / pressure_in_md

    for i, temperature in enumerate(temperatures):

        # define a thermostat
        integrator = LangevinIntegrator(
            temperature.value_in_unit(unit.kelvin),
            timestep.value_in_unit(unit.picosecond),
            collision_rate.value_in_unit(unit.picosecond**-1),
            masses,
            seed,
        )
        integrator_impl = integrator.impl()

        v_0 = sample_velocities(masses * unit.amu, temperature)

        # rescale the box to be approximately the desired box volume already
        rescaler = CentroidRescaler(group_indices)
        initial_volume = compute_box_volume(complex_box)
        initial_center = compute_box_center(complex_box)
        length_scale = ((1 + initial_relative_box_perturbation) *
                        expected_volume_in_md[i] / initial_volume)**(1.0 / 3)
        new_coords = rescaler.scale_centroids(coords, initial_center,
                                              length_scale)
        new_box = complex_box * length_scale

        baro = custom_ops.MonteCarloBarostat(
            new_coords.shape[0],
            pressure.value_in_unit(unit.bar),
            temperature.value_in_unit(unit.kelvin),
            group_indices,
            barostat_interval,
            u_impls,
            seed,
        )

        ctxt = custom_ops.Context(new_coords,
                                  v_0,
                                  new_box,
                                  integrator_impl,
                                  u_impls,
                                  barostat=baro)
        vols = []
        for move in range(n_moves // barostat_interval):
            ctxt.multiple_steps(np.ones(barostat_interval))
            new_box = ctxt.get_box()
            volume = np.linalg.det(new_box)
            vols.append(volume)
        volume_trajs.append(vols)

    equil_time = len(volume_trajs[0]) // 2  # TODO: don't hard-code this?
    actual_volume_in_md = np.array(
        [np.mean(volume_traj[equil_time:]) for volume_traj in volume_trajs])

    np.testing.assert_allclose(actual=actual_volume_in_md,
                               desired=expected_volume_in_md,
                               rtol=relative_tolerance)