def vacuum_model(ff_params):

        unbound_potentials, sys_params, masses, coords = rfe.prepare_vacuum_edge(
            ff_params)

        x0 = coords
        v0 = np.zeros_like(coords)
        client = CUDAPoolClient(1)
        box = np.eye(3, dtype=np.float64) * 100

        harmonic_bond_potential = unbound_potentials[0]
        group_idxs = get_group_indices(get_bond_list(harmonic_bond_potential))

        x0 = coords
        v0 = np.zeros_like(coords)
        client = CUDAPoolClient(1)
        temperature = 300.0
        pressure = 1.0

        integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed)

        barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                      group_idxs, 25, seed)
        model = estimator.FreeEnergyModel(unbound_potentials, client, box, x0,
                                          v0, integrator, lambda_schedule,
                                          equil_steps, prod_steps, barostat)

        return estimator.deltaG(model, sys_params)[0]
Exemple #2
0
def test_free_energy_estimator():

    n_atoms = 5
    x0 = np.random.rand(n_atoms, 3)
    v0 = np.zeros_like(x0)

    n_bonds = 3
    n_angles = 4

    hb_pot, hb_params = get_harmonic_bond(n_atoms, n_bonds)
    ha_pot, ha_params = get_harmonic_angle(n_atoms, n_angles)

    sys_params = [hb_params, ha_params]
    unbound_potentials = [hb_pot, ha_pot]

    masses = np.random.rand(n_atoms)

    box = np.eye(3, dtype=np.float64)

    seed = 2021

    group_idxs = get_group_indices(get_bond_list(hb_pot))

    temperature = 300.0
    pressure = 1.0

    integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed)

    barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                  group_idxs, 25, seed)

    beta = 0.125

    lambda_schedule = np.linspace(0, 1.0, 4)

    def loss_fn(sys_params):

        endpoint_correct = False
        mdl = estimator_abfe.FreeEnergyModel(
            unbound_potentials,
            endpoint_correct,
            client,
            box,
            x0,
            v0,
            integrator,
            barostat,
            lambda_schedule,
            100,
            100,
            beta,
            "test",
        )

        dG, bar_dG_err, results = estimator_abfe.deltaG(mdl, sys_params)

        return dG**2

    for client in [None, CUDAPoolClient(1)]:
        loss_fn(sys_params)
Exemple #3
0
    def _get_integrator(combined_masses):
        """
        Get a integrator. The resulting impl must be bound to a python handle
        whose lifetime is concurrent with that of the context.
        """
        seed = np.random.randint(np.iinfo(np.int32).max)

        return LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed)
Exemple #4
0
def do_deletion(
    x0,
    v0,
    combined_bps,
    combined_masses,
    box,
    guest_name,
    leg_type,
    u_impls,
    deletion_steps,
):
    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    # du_dl_obs = custom_ops.FullPartialUPartialLambda(u_impls, subsample_freq)
    # ctxt.add_observable(du_dl_obs)

    deletion_lambda_schedule = np.linspace(MIN_LAMBDA, DELETION_MAX_LAMBDA,
                                           deletion_steps)

    subsample_freq = 1
    full_du_dls, _, _ = ctxt.multiple_steps(deletion_lambda_schedule,
                                            subsample_freq)

    step = len(deletion_lambda_schedule) - 1
    lamb = deletion_lambda_schedule[-1]
    ctxt.step(lamb)
    report.report_step(
        ctxt,
        step,
        lamb,
        box,
        combined_bps,
        u_impls,
        guest_name,
        deletion_steps,
        f"{leg_type.upper()}_DELETION",
    )

    if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls):
        print("Not calculating work (too much force)")
        return None

    # Note: this condition only applies for ABFE, not RBFE
    if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001:
        print("Not calculating work (du_dl endpoints are not ~0)")
        return None

    work = np.trapz(full_du_dls, deletion_lambda_schedule[::subsample_freq])
    print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}")
    return work
def do_deletion(x0, v0, combined_bps, combined_masses, box, guest_name, leg_type):
    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    subsample_freq = 2
    du_dl_obs = custom_ops.FullPartialUPartialLambda(u_impls, subsample_freq)
    ctxt.add_observable(du_dl_obs)

    deletion_lambda_schedule = np.linspace(
        MIN_LAMBDA, DELETION_MAX_LAMBDA, TRANSITION_STEPS
    )

    calc_work = True

    for step, lamb in enumerate(deletion_lambda_schedule):
        ctxt.step(lamb)
        if step % 100 == 0:
            report.report_step(
                ctxt,
                step,
                lamb,
                box,
                combined_bps,
                u_impls,
                guest_name,
                TRANSITION_STEPS,
                f"{leg_type.upper()}_DELETION",
            )
        if step in (0, int(TRANSITION_STEPS/2), TRANSITION_STEPS-1):
            if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls):
                calc_work = False
                return

    # Note: this condition only applies for ABFE, not RBFE
    if (
        abs(du_dl_obs.full_du_dl()[0]) > 0.001
        or abs(du_dl_obs.full_du_dl()[-1]) > 0.001
    ):
        print("Error: du_dl endpoints are not ~0")
        calc_work = False

    if calc_work:
        work = np.trapz(
            du_dl_obs.full_du_dl(), deletion_lambda_schedule[::subsample_freq]
        )
        print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}")
    def binding_model(ff_params):

        dGs = []

        for host_system, host_coords, host_box in [
            (complex_system, complex_coords, complex_box),
            (solvent_system, solvent_coords, solvent_box),
        ]:

            # minimize the host to avoid clashes
            host_coords = minimizer.minimize_host_4d([mol_a], host_system,
                                                     host_coords, ff, host_box)

            unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                ff_params, host_system, host_coords)

            x0 = coords
            v0 = np.zeros_like(coords)
            client = CUDAPoolClient(1)

            harmonic_bond_potential = unbound_potentials[0]
            group_idxs = get_group_indices(
                get_bond_list(harmonic_bond_potential))

            temperature = 300.0
            pressure = 1.0

            integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses,
                                            seed)

            barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                          group_idxs, 25, seed)

            model = estimator.FreeEnergyModel(
                unbound_potentials,
                client,
                host_box,
                x0,
                v0,
                integrator,
                lambda_schedule,
                equil_steps,
                prod_steps,
                barostat,
            )

            dG, _ = estimator.deltaG(model, sys_params)
            dGs.append(dG)

        return dGs[0] - dGs[1]
def do_switch(
    x0,
    v0,
    combined_bps,
    combined_masses,
    box,
    guest_name,
    leg_type,
    u_impls,
    transition_steps,
):
    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    switching_lambda_schedule = np.linspace(MIN_LAMBDA, MAX_LAMBDA,
                                            transition_steps)

    subsample_interval = 1
    full_du_dls, _, _ = ctxt.multiple_steps(switching_lambda_schedule,
                                            subsample_interval)

    step = len(switching_lambda_schedule) - 1
    lamb = switching_lambda_schedule[-1]
    ctxt.step(lamb)
    report.report_step(
        ctxt,
        step,
        lamb,
        box,
        combined_bps,
        u_impls,
        guest_name,
        transition_steps,
        f"{leg_type.upper()}_SWITCH",
    )

    if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls):
        return

    work = np.trapz(full_du_dls,
                    switching_lambda_schedule[::subsample_interval])
    print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}")
    return work
Exemple #8
0
def minimize(args):

    bound_potentials, masses, x0, box = args

    u_impls = []
    for bp in bound_potentials:
        u_impls.append(bp.bound_impl(precision=np.float32))

    seed = np.random.randint(np.iinfo(np.int32).max)

    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed).impl()

    v0 = np.zeros_like(x0)

    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    lambda_schedule = np.linspace(0.35, 0.0, 500)
    for lamb in lambda_schedule:
        ctxt.step(lamb)

    return ctxt.get_x_t()
Exemple #9
0
def pose_dock(
    guests_sdfile,
    host_pdbfile,
    transition_type,
    n_steps,
    transition_steps,
    max_lambda,
    outdir,
    random_rotation=False,
    constant_atoms=[],
):
    """Runs short simulations in which the guests phase in or out over time

    Parameters
    ----------

    guests_sdfile: path to input sdf with guests to pose/dock
    host_pdbfile: path to host pdb file to dock into
    transition_type: "insertion" or "deletion"
    n_steps: how many total steps of simulation to do (recommended: <= 1000)
    transition_steps: how many steps to insert/delete the guest over (recommended: <= 500)
        (must be <= n_steps)
    max_lambda: lambda value the guest should insert from or delete to
        (recommended: 1.0 for work calulation, 0.25 to stay close to original pose)
        (must be =1 for work calculation to be applicable)
    outdir: where to write output (will be created if it does not already exist)
    random_rotation: whether to apply a random rotation to each guest before inserting
    constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation
        (1-indexed, like PDB files)

    Output
    ------

    A pdb & sdf file every 100 steps (outdir/<guest_name>_<step>.pdb)
    stdout every 100 steps noting the step number, lambda value, and energy
    stdout for each guest noting the work of transition
    stdout for each guest noting how long it took to run

    Note
    ----
    If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py],
    the simulation for that guest will stop and the work will not be calculated.
    """
    assert transition_steps <= n_steps
    assert transition_type in ("insertion", "deletion")
    if random_rotation:
        assert transition_type == "insertion"

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    host_mol = Chem.MolFromPDBFile(host_pdbfile, removeHs=False)
    amber_ff = app.ForceField("amber99sbildn.xml", "tip3p.xml")
    host_file = PDBFile(host_pdbfile)
    host_system = amber_ff.createSystem(
        host_file.topology,
        nonbondedMethod=app.NoCutoff,
        constraints=None,
        rigidWater=False,
    )
    host_conf = []
    for x, y, z in host_file.positions:
        host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)])
    host_conf = np.array(host_conf)

    final_potentials = []
    host_potentials, host_masses = openmm_deserializer.deserialize_system(
        host_system, cutoff=1.2)
    host_nb_bp = None
    for bp in host_potentials:
        if isinstance(bp, potentials.Nonbonded):
            # (ytz): hack to ensure we only have one nonbonded term
            assert host_nb_bp is None
            host_nb_bp = bp
        else:
            final_potentials.append(bp)

    # TODO (ytz): we should really fix this later on. This padding was done to
    # address the particles that are too close to the boundary.
    padding = 0.1
    box_lengths = np.amax(host_conf, axis=0) - np.amin(host_conf, axis=0)
    box_lengths = box_lengths + padding
    box = np.eye(3, dtype=np.float64) * box_lengths

    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_ff_handlers = deserialize_handlers(
            open(
                os.path.join(
                    os.path.dirname(os.path.abspath(__file__)),
                    "..",
                    "ff/params/smirnoff_1_1_0_ccc.py",
                )).read())
        ff = Forcefield(guest_ff_handlers)
        guest_base_topology = topology.BaseTopology(guest_mol, ff)

        # combine
        hgt = topology.HostGuestTopology(host_nb_bp, guest_base_topology)
        # setup the parameter handlers for the ligand
        bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle],
                         [hgt.parameterize_harmonic_angle, ff.ha_handle],
                         [hgt.parameterize_proper_torsion, ff.pt_handle],
                         [hgt.parameterize_improper_torsion, ff.it_handle]]
        these_potentials = list(final_potentials)
        # instantiate the vjps while parameterizing (forward pass)
        for fn, handle in bonded_tuples:
            params, potential = fn(handle.params)
            these_potentials.append(potential.bind(params))
        nb_params, nb_potential = hgt.parameterize_nonbonded(
            ff.q_handle.params, ff.lj_handle.params)
        these_potentials.append(nb_potential.bind(nb_params))
        bps = these_potentials

        guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()]
        masses = np.concatenate([host_masses, guest_masses])

        for atom_num in constant_atoms:
            masses[atom_num - 1] += 50000

        conformer = guest_mol.GetConformer(0)
        mol_conf = np.array(conformer.GetPositions(), dtype=np.float64)
        mol_conf = mol_conf / 10  # convert to md_units

        if random_rotation:
            center = np.mean(mol_conf, axis=0)
            mol_conf -= center
            from scipy.stats import special_ortho_group

            mol_conf = np.matmul(mol_conf, special_ortho_group.rvs(3))
            mol_conf += center

        x0 = np.concatenate([host_conf, mol_conf])  # combined geometry
        v0 = np.zeros_like(x0)

        seed = 2021
        intg = LangevinIntegrator(300, 1.5e-3, 1.0, masses, seed).impl()

        impls = []
        precision = np.float32
        for b in bps:
            p_impl = b.bound_impl(precision)
            impls.append(p_impl)

        ctxt = custom_ops.Context(x0, v0, box, intg, impls)

        # collect a du_dl calculation once every other step
        subsample_freq = 2
        du_dl_obs = custom_ops.FullPartialUPartialLambda(impls, subsample_freq)
        ctxt.add_observable(du_dl_obs)

        if transition_type == "insertion":
            new_lambda_schedule = np.concatenate([
                np.linspace(max_lambda, 0.0, transition_steps),
                np.zeros(n_steps - transition_steps),
            ])
        elif transition_type == "deletion":
            new_lambda_schedule = np.concatenate([
                np.linspace(0.0, max_lambda, transition_steps),
                np.ones(n_steps - transition_steps) * max_lambda,
            ])
        else:
            raise (RuntimeError(
                'invalid `transition_type` (must be one of ["insertion", "deletion"])'
            ))

        calc_work = True
        for step, lamb in enumerate(new_lambda_schedule):
            ctxt.step(lamb)
            if step % 100 == 0:
                report.report_step(ctxt, step, lamb, box, bps, impls,
                                   guest_name, n_steps, 'pose_dock')
                host_coords = ctxt.get_x_t()[:len(host_conf)] * 10
                guest_coords = ctxt.get_x_t()[len(host_conf):] * 10
                report.write_frame(host_coords, host_mol, guest_coords,
                                   guest_mol, guest_name, outdir, step, 'pd')
            if step in (0, int(n_steps / 2), n_steps - 1):
                if report.too_much_force(ctxt, lamb, box, bps, impls):
                    calc_work = False
                    break

        # Note: this condition only applies for ABFE, not RBFE
        if (abs(du_dl_obs.full_du_dl()[0]) > 0.001
                or abs(du_dl_obs.full_du_dl()[-1]) > 0.001):
            print("Error: du_dl endpoints are not ~0")
            calc_work = False

        if calc_work:
            work = np.trapz(du_dl_obs.full_du_dl(),
                            new_lambda_schedule[::subsample_freq])
            print(f"guest_name: {guest_name}\twork: {work:.2f}")
        end_time = time.time()
        print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
Exemple #10
0
    def predict(self, ff_params: list, mol_a: Chem.Mol, mol_b: Chem.Mol,
                core: np.ndarray):
        """
        Predict the ddG of morphing mol_a into mol_b. This function is differentiable w.r.t. ff_params.

        Parameters
        ----------

        ff_params: list of np.ndarray
            This should match the ordered params returned by the forcefield

        mol_a: Chem.Mol
            Starting molecule corresponding to lambda = 0

        mol_b: Chem.Mol
            Starting molecule corresponding to lambda = 1

        core: np.ndarray
            N x 2 list of ints corresponding to the atom mapping of the core.

        Returns
        -------
        float
            delta delta G in kJ/mol
        aux
            list of TI results
        """

        stage_dGs = []
        stage_results = []

        for stage, host_system, host_coords, host_box, lambda_schedule in [
            ("complex", self.complex_system, self.complex_coords,
             self.complex_box, self.complex_schedule),
            ("solvent", self.solvent_system, self.solvent_coords,
             self.solvent_box, self.solvent_schedule),
        ]:
            single_topology = topology.SingleTopology(mol_a, mol_b, core,
                                                      self.ff)
            rfe = free_energy.RelativeFreeEnergy(single_topology)
            edge_hash = self._edge_hash(stage, mol_a, mol_b, core)
            if self.pre_equilibrate and edge_hash in self._equil_cache:
                cached_state = self._equil_cache[edge_hash]
                x0 = cached_state.coords
                host_box = cached_state.box
                num_host_coords = len(host_coords)
                unbound_potentials, sys_params, masses, _ = rfe.prepare_host_edge(
                    ff_params, host_system, host_coords)
                mol_a_size = mol_a.GetNumAtoms()
                # Use Dual Topology to pre equilibrate, so have to get the mean of the two sets of mol,
                # normally done within prepare_host_edge, but the whole system has moved by this stage
                x0 = np.concatenate([
                    x0[:num_host_coords],
                    np.mean(
                        single_topology.interpolate_params(
                            x0[num_host_coords:num_host_coords + mol_a_size],
                            x0[num_host_coords + mol_a_size:]),
                        axis=0,
                    ),
                ])
            else:
                if self.pre_equilibrate:
                    print(
                        "Edge not correctly pre-equilibrated, ensure equilibrate_edges was called"
                    )
                print(
                    f"Minimizing the {stage} host structure to remove clashes."
                )
                # (ytz): this isn't strictly symmetric, and we should modify minimize later on remove
                # the hysteresis by jointly minimizing against a and b at the same time. We may also want
                # to remove the randomness completely from the minimization.
                min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b],
                                                             host_system,
                                                             host_coords,
                                                             self.ff, host_box)

                unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                    ff_params, host_system, min_host_coords)

                x0 = coords
            v0 = np.zeros_like(x0)

            time_step = 1.5e-3

            harmonic_bond_potential = unbound_potentials[0]
            bond_list = get_bond_list(harmonic_bond_potential)
            if self.hmr:
                masses = apply_hmr(masses, bond_list)
                time_step = 2.5e-3
            group_idxs = get_group_indices(bond_list)

            seed = 0

            temperature = 300.0
            pressure = 1.0

            integrator = LangevinIntegrator(temperature, time_step, 1.0,
                                            masses, seed)

            barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                          group_idxs, self.barostat_interval,
                                          seed)

            model = estimator.FreeEnergyModel(
                unbound_potentials,
                self.client,
                host_box,
                x0,
                v0,
                integrator,
                lambda_schedule,
                self.equil_steps,
                self.prod_steps,
                barostat,
            )

            dG, results = estimator.deltaG(model, sys_params)

            stage_dGs.append(dG)
            stage_results.append((stage, results))

        pred = stage_dGs[0] - stage_dGs[1]

        return pred, stage_results
Exemple #11
0
def test_free_energy_estimator_with_endpoint_correction():
    """
    Test that we generate correctly shaped derivatives in the estimator code
    when the endpoint correction is turned on. We expected that f([a,b,c,...])
    to generate derivatives df/da, df/db, df/dc, df/d... such that
    df/da.shape == a.shape, df/db.shape == b.shape, df/dc == c.shape, and etc.
    """

    n_atoms = 15
    x0 = np.random.rand(n_atoms, 3)
    v0 = np.zeros_like(x0)

    n_bonds = 3
    n_angles = 4
    n_restraints = 5

    hb_pot, hb_params = get_harmonic_bond(n_atoms, n_bonds)
    ha_pot, ha_params = get_harmonic_angle(n_atoms, n_angles)
    rs_pot, rs_params = get_harmonic_restraints(n_atoms, n_restraints)

    sys_params = [hb_params, ha_params, rs_params]
    unbound_potentials = [hb_pot, ha_pot, rs_pot]

    masses = np.random.rand(n_atoms)

    box = np.eye(3, dtype=np.float64)

    seed = 2021

    group_idxs = get_group_indices(get_bond_list(hb_pot))

    temperature = 300.0
    pressure = 1.0

    integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed)

    barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                  group_idxs, 25, seed)

    beta = 0.125

    lambda_schedule = np.linspace(0, 1.0, 4)

    def loss_fn(sys_params):

        endpoint_correct = True
        mdl = estimator_abfe.FreeEnergyModel(
            unbound_potentials,
            endpoint_correct,
            client,
            box,
            x0,
            v0,
            integrator,
            barostat,
            lambda_schedule,
            100,
            100,
            beta,
            "test",
        )

        dG, bar_dG_err, results = estimator_abfe.deltaG(mdl, sys_params)

        return dG**2

    for client in [None, CUDAPoolClient(1)]:
        loss_fn(sys_params)
Exemple #12
0
def dock_and_equilibrate(host_pdbfile,
                         guests_sdfile,
                         max_lambda,
                         insertion_steps,
                         eq_steps,
                         outdir,
                         fewer_outfiles=False,
                         constant_atoms=[]):
    """Solvates a host, inserts guest(s) into solvated host, equilibrates

    Parameters
    ----------

    host_pdbfile: path to host pdb file to dock into
    guests_sdfile: path to input sdf with guests to pose/dock
    max_lambda: lambda value the guest should insert from or delete to
        (recommended: 1.0 for work calulation, 0.25 to stay close to original pose)
        (must be =1 for work calculation to be applicable)
    insertion_steps: how many steps to insert the guest over (recommended: 501)
    eq_steps: how many steps of equilibration to do after insertion (recommended: 15001)
    outdir: where to write output (will be created if it does not already exist)
    fewer_outfiles: if True, will only write frames for the equilibration, not insertion
    constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation
        (1-indexed, like PDB files)

    Output
    ------

    A pdb & sdf file every 100 steps of insertion (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf])
    A pdb & sdf file every 1000 steps of equilibration (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf])
    stdout every 100(0) steps noting the step number, lambda value, and energy
    stdout for each guest noting the work of transition
    stdout for each guest noting how long it took to run

    Note
    ----
    If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py],
    the simulation for that guest will stop and the work will not be calculated.
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}
    MAX_LAMBDA = {max_lambda}
    INSERTION_STEPS = {insertion_steps}
    EQ_STEPS = {eq_steps}
    """)

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    # TODO: return topology from builders.build_protein_system
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    # sometimes water boxes are sad. Should be minimized first; this is a workaround
    host_box += np.eye(3) * 0.1
    print("host box", host_box)

    solvated_host_pdb = os.path.join(outdir, "solvated_host.pdb")
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    os.remove(solvated_host_pdb)
    final_host_potentials = []
    host_potentials, host_masses = openmm_deserializer.deserialize_system(
        solvated_host_system, cutoff=1.2)
    host_nb_bp = None
    for bp in host_potentials:
        if isinstance(bp, potentials.Nonbonded):
            # (ytz): hack to ensure we only have one nonbonded term
            assert host_nb_bp is None
            host_nb_bp = bp
        else:
            final_host_potentials.append(bp)

    # Run the procedure
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(),
                                     dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units
        guest_ff_handlers = deserialize_handlers(
            open(
                os.path.join(
                    os.path.dirname(os.path.abspath(__file__)),
                    "..",
                    "ff/params/smirnoff_1_1_0_ccc.py",
                )).read())
        ff = Forcefield(guest_ff_handlers)
        guest_base_top = topology.BaseTopology(guest_mol, ff)

        # combine host & guest
        hgt = topology.HostGuestTopology(host_nb_bp, guest_base_top)
        # setup the parameter handlers for the ligand
        bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle],
                         [hgt.parameterize_harmonic_angle, ff.ha_handle],
                         [hgt.parameterize_proper_torsion, ff.pt_handle],
                         [hgt.parameterize_improper_torsion, ff.it_handle]]
        combined_bps = list(final_host_potentials)
        # instantiate the vjps while parameterizing (forward pass)
        for fn, handle in bonded_tuples:
            params, potential = fn(handle.params)
            combined_bps.append(potential.bind(params))
        nb_params, nb_potential = hgt.parameterize_nonbonded(
            ff.q_handle.params, ff.lj_handle.params)
        combined_bps.append(nb_potential.bind(nb_params))
        guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()]
        combined_masses = np.concatenate([host_masses, guest_masses])

        x0 = np.concatenate([solvated_host_coords, orig_guest_coords])
        v0 = np.zeros_like(x0)
        print(
            f"SYSTEM",
            f"guest_name: {guest_name}",
            f"num_atoms: {len(x0)}",
        )

        for atom_num in constant_atoms:
            combined_masses[atom_num - 1] += 50000

        seed = 2021
        intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses,
                                  seed).impl()

        u_impls = []
        for bp in combined_bps:
            bp_impl = bp.bound_impl(precision=np.float32)
            u_impls.append(bp_impl)

        ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

        # collect a du_dl calculation once every other step
        subsample_freq = 2
        du_dl_obs = custom_ops.FullPartialUPartialLambda(
            u_impls, subsample_freq)
        ctxt.add_observable(du_dl_obs)

        # insert guest
        insertion_lambda_schedule = np.linspace(max_lambda, 0.0,
                                                insertion_steps)
        calc_work = True
        for step, lamb in enumerate(insertion_lambda_schedule):
            ctxt.step(lamb)
            if step % 100 == 0:
                report.report_step(ctxt, step, lamb, host_box, combined_bps,
                                   u_impls, guest_name, insertion_steps,
                                   "INSERTION")
                if not fewer_outfiles:
                    host_coords = ctxt.get_x_t()[:len(solvated_host_coords
                                                      )] * 10
                    guest_coords = ctxt.get_x_t()[len(solvated_host_coords
                                                      ):] * 10
                    report.write_frame(
                        host_coords,
                        solvated_host_mol,
                        guest_coords,
                        guest_mol,
                        guest_name,
                        outdir,
                        str(step).zfill(len(str(insertion_steps))),
                        f"ins",
                    )
            if step in (0, int(insertion_steps / 2), insertion_steps - 1):
                if report.too_much_force(ctxt, lamb, host_box, combined_bps,
                                         u_impls):
                    calc_work = False
                    break

        # Note: this condition only applies for ABFE, not RBFE
        if (abs(du_dl_obs.full_du_dl()[0]) > 0.001
                or abs(du_dl_obs.full_du_dl()[-1]) > 0.001):
            print("Error: du_dl endpoints are not ~0")
            calc_work = False

        if calc_work:
            work = np.trapz(du_dl_obs.full_du_dl(),
                            insertion_lambda_schedule[::subsample_freq])
            print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}")

        # equilibrate
        for step in range(eq_steps):
            ctxt.step(0.00)
            if step % 1000 == 0:
                report.report_step(ctxt, step, 0.00, host_box, combined_bps,
                                   u_impls, guest_name, eq_steps,
                                   'EQUILIBRATION')
                host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10
                report.write_frame(
                    host_coords,
                    solvated_host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(eq_steps))),
                    f"eq",
                )
            if step in (0, int(eq_steps / 2), eq_steps - 1):
                if report.too_much_force(ctxt, 0.00, host_box, combined_bps,
                                         u_impls):
                    break

        end_time = time.time()
        print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
Exemple #13
0
def run_leg(
    orig_host_coords,
    orig_guest_coords,
    combined_bps,
    combined_masses,
    host_box,
    guest_name,
    leg_type,
    host_mol,
    guest_mol,
    outdir,
    num_deletions,
    deletion_steps,
    insertion_max_lambda,
    insertion_steps,
    eq1_steps,
    fewer_outfiles=False,
    no_outfiles=False,
):
    x0 = np.concatenate([orig_host_coords, orig_guest_coords])
    v0 = np.zeros_like(x0)
    print(
        f"{leg_type.upper()}_SYSTEM",
        f"guest_name: {guest_name}",
        f"num_atoms: {len(x0)}",
    )

    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

    # insert guest
    insertion_lambda_schedule = np.linspace(insertion_max_lambda, MIN_LAMBDA,
                                            insertion_steps)

    ctxt.multiple_steps(insertion_lambda_schedule, 0)  # do not collect du_dls

    lamb = insertion_lambda_schedule[-1]
    step = len(insertion_lambda_schedule) - 1

    report.report_step(
        ctxt,
        step,
        lamb,
        host_box,
        combined_bps,
        u_impls,
        guest_name,
        insertion_steps,
        f"{leg_type.upper()}_INSERTION",
    )
    if not fewer_outfiles and not no_outfiles:
        host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10
        guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10
        report.write_frame(
            host_coords,
            host_mol,
            guest_coords,
            guest_mol,
            guest_name,
            outdir,
            str(step).zfill(len(str(insertion_steps))),
            f"{leg_type}-ins",
        )
    if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls):
        return []

    # equilibrate
    equil_lambda_schedule = np.ones(eq1_steps) * MIN_LAMBDA
    lamb = equil_lambda_schedule[-1]
    step = len(equil_lambda_schedule) - 1
    ctxt.multiple_steps(equil_lambda_schedule, 0)
    report.report_step(
        ctxt,
        step,
        MIN_LAMBDA,
        host_box,
        combined_bps,
        u_impls,
        guest_name,
        eq1_steps,
        f"{leg_type.upper()}_EQUILIBRATION_1",
    )
    if not fewer_outfiles and not no_outfiles:
        host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10
        guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10
        report.write_frame(
            host_coords,
            host_mol,
            guest_coords,
            guest_mol,
            guest_name,
            outdir,
            str(step).zfill(len(str(eq1_steps))),
            f"{leg_type}-eq1",
        )
    if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps,
                             u_impls):
        print("Too much force")
        return []

    # equilibrate more & shoot off deletion jobs
    steps_per_batch = 1001
    works = []
    for b in range(num_deletions):
        deletion_lambda_schedule = np.ones(steps_per_batch) * MIN_LAMBDA

        ctxt.multiple_steps(deletion_lambda_schedule, 0)
        lamb = deletion_lambda_schedule[-1]
        step = len(deletion_lambda_schedule) - 1
        report.report_step(
            ctxt,
            (b + 1) * step,
            MIN_LAMBDA,
            host_box,
            combined_bps,
            u_impls,
            guest_name,
            num_deletions * steps_per_batch,
            f"{leg_type.upper()}_EQUILIBRATION_2",
        )

        # TODO: if guest has undocked, stop simulation
        if not no_outfiles:
            host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10
            guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10
            report.write_frame(
                host_coords,
                host_mol,
                guest_coords,
                guest_mol,
                guest_name,
                outdir,
                str((b + 1) * step).zfill(
                    len(str(num_deletions * steps_per_batch))),
                f"{leg_type}-eq2",
            )
        if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps,
                                 u_impls):
            print("Too much force")
            return works

        work = do_deletion(
            ctxt.get_x_t(),
            ctxt.get_v_t(),
            combined_bps,
            combined_masses,
            host_box,
            guest_name,
            leg_type,
            u_impls,
            deletion_steps,
        )
        works.append(work)

    return works
Exemple #14
0
    def _futures_a_to_b(self, ff_params, mol_a, mol_b, combined_core_idxs, x0,
                        box0, prefix, seed):

        num_host_atoms = x0.shape[0] - mol_a.GetNumAtoms() - mol_b.GetNumAtoms(
        )

        # (ytz): super ugly, undo combined_core_idxs to get back original idxs
        core_idxs = combined_core_idxs - num_host_atoms
        core_idxs[:, 1] -= mol_a.GetNumAtoms()

        dual_topology = self.setup_topology(mol_a, mol_b)
        rfe = free_energy_rabfe.RelativeFreeEnergy(dual_topology)

        unbound_potentials, sys_params, masses = rfe.prepare_host_edge(
            ff_params, self.host_system)

        k_core = 30.0

        core_params = np.zeros_like(combined_core_idxs).astype(np.float64)
        core_params[:, 0] = k_core

        restraint_potential = potentials.HarmonicBond(combined_core_idxs, )

        unbound_potentials.append(restraint_potential)
        sys_params.append(core_params)

        # tbd sample from boltzmann distribution later
        v0 = np.zeros_like(x0)

        beta = 1 / (constants.BOLTZ * self.temperature)

        bond_list = np.concatenate(
            [unbound_potentials[0].get_idxs(), core_idxs])
        masses = model_utils.apply_hmr(masses, bond_list)

        friction = 1.0
        integrator = LangevinIntegrator(self.temperature, self.dt, friction,
                                        masses, seed)
        bond_list = list(map(tuple, bond_list))
        group_indices = get_group_indices(bond_list)
        barostat_interval = 5

        barostat = MonteCarloBarostat(x0.shape[0], self.pressure,
                                      self.temperature, group_indices,
                                      barostat_interval, seed)

        endpoint_correct = True
        model = estimator_abfe.FreeEnergyModel(
            unbound_potentials,
            endpoint_correct,
            self.client,
            box0,  # important, use equilibrated box.
            x0,
            v0,
            integrator,
            barostat,
            self.host_schedule,
            self.equil_steps,
            self.prod_steps,
            beta,
            prefix,
        )

        bound_potentials = []
        for params, unbound_pot in zip(sys_params, model.unbound_potentials):
            bp = unbound_pot.bind(np.asarray(params))
            bound_potentials.append(bp)

        all_args = []
        for lamb_idx, lamb in enumerate(model.lambda_schedule):

            subsample_interval = 1000

            all_args.append((
                lamb,
                model.box,
                model.x0,
                model.v0,
                bound_potentials,
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                model.lambda_schedule,
            ))

        if endpoint_correct:

            assert isinstance(bound_potentials[-1], potentials.HarmonicBond)

            all_args.append((
                1.0,
                model.box,
                model.x0,
                model.v0,
                bound_potentials[:-1],  # strip out the restraints
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                [],  # no need to evaluate Us for the endpoint correction
            ))

        futures = []
        if self.client is None:
            for args in all_args:
                futures.append(_MockFuture(estimator_abfe.simulate(*args)))
        else:
            for args in all_args:
                futures.append(
                    self.client.submit(estimator_abfe.simulate, *args))

        return sys_params, model, futures
Exemple #15
0
def minimize_host_4d(romol, host_system, host_coords, ff, box):
    """
    Insert romol into a host system via 4D decoupling under a Langevin thermostat.
    The ligand coordinates are fixed during this, and only host_coordinates are minimized.

    Parameters
    ----------
    romol: ROMol
        Ligand to be inserted. It must be embedded.

    host_system: openmm.System
        OpenMM System representing the host

    host_coords: np.ndarray
        N x 3 coordinates of the host. units of nanometers.

    ff: ff.Forcefield
        Wrapper class around a list of handlers

    box: np.ndarray [3,3]
        Box matrix for periodic boundary conditions. units of nanometers.

    Returns
    -------
    np.ndarray
        This returns minimized host_coords.

    """

    host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2)

    # keep the ligand rigid
    ligand_masses = [a.GetMass()*100000 for a in romol.GetAtoms()]
    combined_masses = np.concatenate([host_masses, ligand_masses])
    ligand_coords = get_romol_conf(romol)
    combined_coords = np.concatenate([host_coords, ligand_coords])
    num_host_atoms = host_coords.shape[0]

    final_potentials = []
    for bp in host_bps:
        if isinstance(bp, potentials.Nonbonded):
            host_p = bp
        else:
            final_potentials.append(bp)

    gbt = topology.BaseTopology(romol, ff)
    hgt = topology.HostGuestTopology(host_p, gbt)

    # setup the parameter handlers for the ligand
    tuples = [
        [hgt.parameterize_harmonic_bond, [ff.hb_handle]],
        [hgt.parameterize_harmonic_angle, [ff.ha_handle]],
        [hgt.parameterize_proper_torsion, [ff.pt_handle]],
        [hgt.parameterize_improper_torsion, [ff.it_handle]],
        [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]],
    ]

    for fn, handles in tuples:
        params, potential = fn(*[h.params for h in handles])
        final_potentials.append(potential.bind(params))

    seed = 2020

    intg = LangevinIntegrator(
        300.0,
        1.5e-3,
        1.0,
        combined_masses,
        seed
    ).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    u_impls = []

    for bp in final_potentials:
        fn = bp.bound_impl(precision=np.float32)
        u_impls.append(fn)

    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg,
        u_impls
    )

    for lamb in np.linspace(1.0, 0, 1000):
        ctxt.step(lamb)

    return ctxt.get_x_t()[:num_host_atoms]
Exemple #16
0
def minimize_host_4d(mols, host_system, host_coords, ff, box, mol_coords=None) -> np.ndarray:
    """
    Insert mols into a host system via 4D decoupling using Fire minimizer at lambda=1.0,
    0 Kelvin Langevin integration at a sequence of lambda from 1.0 to 0.0, and Fire minimizer again at lambda=0.0

    The ligand coordinates are fixed during this, and only host_coords are minimized.

    Parameters
    ----------
    mols: list of Chem.Mol
        Ligands to be inserted. This must be of length 1 or 2 for now.

    host_system: openmm.System
        OpenMM System representing the host

    host_coords: np.ndarray
        N x 3 coordinates of the host. units of nanometers.

    ff: ff.Forcefield
        Wrapper class around a list of handlers

    box: np.ndarray [3,3]
        Box matrix for periodic boundary conditions. units of nanometers.

    mol_coords: list of np.ndarray
        Pre-specify a list of mol coords. Else use the mol.GetConformer(0)

    Returns
    -------
    np.ndarray
        This returns minimized host_coords.

    """

    assert box.shape == (3, 3)

    host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2)

    num_host_atoms = host_coords.shape[0]

    if len(mols) == 1:
        top = topology.BaseTopology(mols[0], ff)
    elif len(mols) == 2:
        top = topology.DualTopologyMinimization(mols[0], mols[1], ff)
    else:
        raise ValueError("mols must be length 1 or 2")

    mass_list = [np.array(host_masses)]
    conf_list = [np.array(host_coords)]
    for mol in mols:
        # mass increase is to keep the ligand fixed
        mass_list.append(np.array([a.GetMass() * 100000 for a in mol.GetAtoms()]))

    if mol_coords is not None:
        for mc in mol_coords:
            conf_list.append(mc)
    else:
        for mol in mols:
            conf_list.append(get_romol_conf(mol))

    combined_masses = np.concatenate(mass_list)
    combined_coords = np.concatenate(conf_list)

    hgt = topology.HostGuestTopology(host_bps, top)

    u_impls = bind_potentials(hgt, ff)

    # this value doesn't matter since we will turn off the noise.
    seed = 0

    intg = LangevinIntegrator(0.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    x0 = fire_minimize(x0, u_impls, box, np.ones(50))
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)
    ctxt.multiple_steps(np.linspace(1.0, 0, 1000))

    final_coords = fire_minimize(ctxt.get_x_t(), u_impls, box, np.zeros(50))
    for impl in u_impls:
        du_dx, _, _ = impl.execute(final_coords, box, 0.0)
        norm = np.linalg.norm(du_dx, axis=-1)
        assert np.all(norm < 25000)

    return final_coords[:num_host_atoms]
Exemple #17
0
def equilibrate_host(
    mol: Chem.Mol,
    host_system: openmm.System,
    host_coords: NDArray,
    temperature: float,
    pressure: float,
    ff: Forcefield,
    box: NDArray,
    n_steps: int,
    seed: Optional[int] = None,
) -> Tuple[NDArray, NDArray]:
    """
    Equilibrate a host system given a reference molecule using the MonteCarloBarostat.

    Useful for preparing a host that will be used for multiple FEP calculations using the same reference, IE a starmap.

    Performs the following:
    - Minimize host with rigid mol
    - Minimize host and mol
    - Run n_steps with HMR enabled and MonteCarloBarostat every 5 steps

    Parameters
    ----------
    mol: Chem.Mol
        Ligand for the host to equilibrate with.

    host_system: openmm.System
        OpenMM System representing the host.

    host_coords: np.ndarray
        N x 3 coordinates of the host. units of nanometers.

    temperature: float
        Temperature at which to run the simulation. Units of kelvins.

    pressure: float
        Pressure at which to run the simulation. Units of bars.

    ff: ff.Forcefield
        Wrapper class around a list of handlers.

    box: np.ndarray [3,3]
        Box matrix for periodic boundary conditions. units of nanometers.

    n_steps: int
        Number of steps to run the simulation for.

    seed: int or None
        Value to seed simulation with

    Returns
    -------
    tuple (coords, box)
        Returns equilibrated system coords as well as the box.

    """
    # insert mol into the binding pocket.
    host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2)

    min_host_coords = minimize_host_4d([mol], host_system, host_coords, ff, box)

    ligand_masses = [a.GetMass() for a in mol.GetAtoms()]
    ligand_coords = get_romol_conf(mol)

    combined_masses = np.concatenate([host_masses, ligand_masses])
    combined_coords = np.concatenate([min_host_coords, ligand_coords])

    top = topology.BaseTopology(mol, ff)
    hgt = topology.HostGuestTopology(host_bps, top)

    # setup the parameter handlers for the ligand
    tuples = [
        [hgt.parameterize_harmonic_bond, [ff.hb_handle]],
        [hgt.parameterize_harmonic_angle, [ff.ha_handle]],
        [hgt.parameterize_periodic_torsion, [ff.pt_handle, ff.it_handle]],
        [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]],
    ]

    u_impls = []
    bound_potentials = []

    for fn, handles in tuples:
        params, potential = fn(*[h.params for h in handles])
        bp = potential.bind(params)
        bound_potentials.append(bp)
        u_impls.append(bp.bound_impl(precision=np.float32))

    bond_list = get_bond_list(bound_potentials[0])
    combined_masses = model_utils.apply_hmr(combined_masses, bond_list)

    dt = 2.5e-3
    friction = 1.0

    if seed is None:
        seed = np.random.randint(np.iinfo(np.int32).max)

    integrator = LangevinIntegrator(temperature, dt, friction, combined_masses, seed).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    group_indices = get_group_indices(bond_list)
    barostat_interval = 5
    barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_indices, barostat_interval, seed).impl(
        u_impls
    )

    # Re-minimize with the mol being flexible
    x0 = fire_minimize(x0, u_impls, box, np.ones(50))
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, integrator, u_impls, barostat)

    ctxt.multiple_steps(np.linspace(0.0, 0.0, n_steps))

    return ctxt.get_x_t(), ctxt.get_box()
Exemple #18
0
def dock_and_equilibrate(
    host_pdbfile,
    guests_sdfile,
    max_lambda,
    insertion_steps,
    eq_steps,
    outdir,
    fewer_outfiles=False,
    constant_atoms=[],
):
    """Solvates a host, inserts guest(s) into solvated host, equilibrates

    Parameters
    ----------

    host_pdbfile: path to host pdb file to dock into
    guests_sdfile: path to input sdf with guests to pose/dock
    max_lambda: lambda value the guest should insert from or delete to
        (recommended: 1.0 for work calulation, 0.25 to stay close to original pose)
        (must be =1 for work calculation to be applicable)
    insertion_steps: how many steps to insert the guest over (recommended: 501)
    eq_steps: how many steps of equilibration to do after insertion (recommended: 15001)
    outdir: where to write output (will be created if it does not already exist)
    fewer_outfiles: if True, will only write frames for the equilibration, not insertion
    constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation
        (1-indexed, like PDB files)

    Output
    ------

    A pdb & sdf file for the last step of insertion
       (outdir/<guest_name>/<guest_name>_ins_<step>_[host.pdb/guest.sdf])
    A pdb & sdf file every 1000 steps of equilibration
       (outdir/<guest_name>/<guest_name>_eq_<step>_[host.pdb/guest.sdf])
    stdout corresponding to the files written noting the lambda value and energy
    stdout for each guest noting the work of transition, if applicable
    stdout for each guest noting how long it took to run

    Note
    ----
    The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of
    force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py]
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}
    MAX_LAMBDA = {max_lambda}
    INSERTION_STEPS = {insertion_steps}
    EQ_STEPS = {eq_steps}
    """)

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True)
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    os.remove(solvated_host_pdb)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    # Run the procedure
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(),
                                     dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units

        minimized_coords = minimizer.minimize_host_4d([guest_mol],
                                                      solvated_host_system,
                                                      solvated_host_coords, ff,
                                                      host_box)

        afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff)

        ups, sys_params, combined_masses, _ = afe.prepare_host_edge(
            ff.get_ordered_params(), solvated_host_system, minimized_coords)

        combined_bps = []
        for up, sp in zip(ups, sys_params):
            combined_bps.append(up.bind(sp))

        x0 = np.concatenate([minimized_coords, orig_guest_coords])
        v0 = np.zeros_like(x0)
        print("SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}")

        for atom_num in constant_atoms:
            combined_masses[atom_num - 1] += 50000

        seed = 2021
        intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses,
                                  seed).impl()

        u_impls = []
        for bp in combined_bps:
            bp_impl = bp.bound_impl(precision=np.float32)
            u_impls.append(bp_impl)

        ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

        # insert guest
        insertion_lambda_schedule = np.linspace(max_lambda, 0.0,
                                                insertion_steps)
        calc_work = True

        # collect a du_dl calculation once every other step
        subsample_interval = 1

        full_du_dls, _, _ = ctxt.multiple_steps(insertion_lambda_schedule,
                                                subsample_interval)
        step = len(insertion_lambda_schedule) - 1
        lamb = insertion_lambda_schedule[-1]
        ctxt.step(lamb)

        report.report_step(
            ctxt,
            step,
            lamb,
            host_box,
            combined_bps,
            u_impls,
            guest_name,
            insertion_steps,
            "INSERTION",
        )
        if not fewer_outfiles:
            host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10
            guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10
            report.write_frame(
                host_coords,
                solvated_host_mol,
                guest_coords,
                guest_mol,
                guest_name,
                outdir,
                str(step).zfill(len(str(insertion_steps))),
                "ins",
            )

        if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls):
            print("Not calculating work (too much force)")
            calc_work = False
            continue

        # Note: this condition only applies for ABFE, not RBFE
        if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001:
            print("Not calculating work (du_dl endpoints are not ~0)")
            calc_work = False

        if calc_work:
            work = np.trapz(full_du_dls,
                            insertion_lambda_schedule[::subsample_interval])
            print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}")

        # equilibrate
        for step in range(eq_steps):
            ctxt.step(0.00)
            if step % 1000 == 0:
                report.report_step(
                    ctxt,
                    step,
                    0.00,
                    host_box,
                    combined_bps,
                    u_impls,
                    guest_name,
                    eq_steps,
                    "EQUILIBRATION",
                )
                if (not fewer_outfiles) or (step == eq_steps - 1):
                    host_coords = ctxt.get_x_t()[:len(solvated_host_coords
                                                      )] * 10
                    guest_coords = ctxt.get_x_t()[len(solvated_host_coords
                                                      ):] * 10
                    report.write_frame(
                        host_coords,
                        solvated_host_mol,
                        guest_coords,
                        guest_mol,
                        guest_name,
                        outdir,
                        str(step).zfill(len(str(eq_steps))),
                        "eq",
                    )
            if step in (0, int(eq_steps / 2), eq_steps - 1):
                if report.too_much_force(ctxt, 0.00, host_box, combined_bps,
                                         u_impls):
                    break

        end_time = time.time()
        print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
Exemple #19
0
    def equilibrate_edges(
        self,
        edges: List[Tuple[Chem.Mol, Chem.Mol, np.ndarray]],
        lamb: float = 0.0,
        barostat_interval: int = 10,
        equilibration_steps: int = 100000,
        cache_path: str = "equilibration_cache.pkl",
    ):
        """
        edges: List of tuples with mol_a, mol_b, core
            Edges to equilibrate

        lamb: float
            Lambda value to equilibrate at. Uses Dual Topology to equilibrate

        barostat_interval: int
            Interval on which to run barostat during equilibration

        equilibration_steps: int
            Number of steps to equilibrate the edge for

        cache_path: string
            Path to look for existing cache or path to where to save cache. By default
            it will write out a pickle file in the local directory.

        Pre equilibrate edges and cache them for later use in predictions.

        Parallelized via the model client if possible
        """
        if not self.pre_equilibrate:
            return
        if os.path.isfile(cache_path):
            with open(cache_path, "rb") as ifs:
                self._equil_cache = load(ifs)
            print("Loaded Pre-equilibrated structures from cache")
            return
        futures = []
        ordered_params = self.ff.get_ordered_params()

        temperature = 300.0
        pressure = 1.0

        for stage, host_system, host_coords, host_box in [
            ("complex", self.complex_system, self.complex_coords,
             self.complex_box),
            ("solvent", self.solvent_system, self.solvent_coords,
             self.solvent_box),
        ]:
            # Run all complex legs first then solvent, as they will likely take longer than then solvent leg
            for mol_a, mol_b, core in edges:
                # Use DualTopology to ensure mols exist in the same space.
                topo = topology.DualTopologyMinimization(mol_a, mol_b, self.ff)
                rfe = free_energy.RelativeFreeEnergy(topo)
                min_coords = minimizer.minimize_host_4d([mol_a, mol_b],
                                                        host_system,
                                                        host_coords, self.ff,
                                                        host_box)
                unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                    ordered_params, host_system, min_coords)
                # num_host_coords = len(host_coords)
                # masses[num_host_coords:] *= 1000000 # Lets see if masses are the difference
                harmonic_bond_potential = unbound_potentials[0]
                bond_list = get_bond_list(harmonic_bond_potential)
                group_idxs = get_group_indices(bond_list)
                time_step = 1.5e-3
                if self.hmr:
                    masses = apply_hmr(masses, bond_list)
                    time_step = 2.5e-3
                integrator = LangevinIntegrator(temperature, time_step, 1.0,
                                                masses, 0)
                barostat = MonteCarloBarostat(coords.shape[0], pressure,
                                              temperature, group_idxs,
                                              barostat_interval, 0)
                pots = []
                for bp, params in zip(unbound_potentials, sys_params):
                    pots.append(bp.bind(np.asarray(params)))
                future = self.client.submit(
                    estimator.equilibrate, *[
                        integrator, barostat, pots, coords, host_box, lamb,
                        equilibration_steps
                    ])
                futures.append((stage, (mol_a, mol_b, core), future))
        num_equil = len(futures)
        for i, (stage, edge, future) in enumerate(futures):
            edge_hash = self._edge_hash(stage, *edge)
            self._equil_cache[edge_hash] = future.result()
            if (i + 1) % 5 == 0:
                print(f"Pre-equilibrated {i+1} of {num_equil} edges")
        print(f"Pre-equilibrated {num_equil} edges")
        if cache_path:
            with open(cache_path, "wb") as ofs:
                dump(self._equil_cache, ofs)
            print(f"Saved equilibration_cache to {cache_path}")
Exemple #20
0
def benchmark_dhfr():

    pdb_path = 'tests/data/5dfr_solv_equil.pdb'
    host_pdb = app.PDBFile(pdb_path)
    protein_ff = app.ForceField('amber99sbildn.xml', 'tip3p.xml')
    host_system = protein_ff.createSystem(
        host_pdb.topology,
        nonbondedMethod=app.NoCutoff,
        constraints=None,
        rigidWater=False
    )
    host_coords = host_pdb.positions
    box = host_pdb.topology.getPeriodicBoxVectors()
    box = np.asarray(box/box.unit)

    host_fns, host_masses = openmm_deserializer.deserialize_system(
        host_system,
        cutoff=1.0
    )

    host_conf = []
    for x,y,z in host_coords:
        host_conf.append([to_md_units(x),to_md_units(y),to_md_units(z)])
    host_conf = np.array(host_conf)

    seed = 1234
    dt = 1.5e-3

    intg = LangevinIntegrator(
        300,
        dt,
        1.0,
        np.array(host_masses),
        seed
    ).impl()

    bps = []

    for potential in host_fns:
        bps.append(potential.bound_impl(precision=np.float32)) # get the bound implementation

    x0 = host_conf
    v0 = np.zeros_like(host_conf)

    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg,
        bps
    )

    # initialize observables
    obs = []
    for bp in bps:
        du_dp_obs = custom_ops.AvgPartialUPartialParam(bp, 100)
        ctxt.add_observable(du_dp_obs)
        obs.append(du_dp_obs)

    lamb = 0.0

    start = time.time()
    # num_steps = 50000
    num_steps = 50000
    # num_steps = 10

    writer = PDBWriter([host_pdb.topology], "dhfr.pdb")

    for step in range(num_steps):
        ctxt.step(lamb)
        if step % 1000 == 0:

            delta = time.time()-start
            steps_per_second = step/delta
            seconds_per_day = 86400
            steps_per_day = steps_per_second*seconds_per_day
            ps_per_day = dt*steps_per_day
            ns_per_day = ps_per_day*1e-3

            print(step, "ns/day", ns_per_day)
            # coords = recenter(ctxt.get_x_t(), box)
            # writer.write_frame(coords*10)

    print("total time", time.time() - start)

    writer.close()


    # bond angle torsions nonbonded
    for potential, du_dp_obs in zip(host_fns, obs):
        dp = du_dp_obs.avg_du_dp()
        print(potential, dp.shape)
        print(dp)
def run_leg(
    combined_coords,
    combined_bps,
    combined_masses,
    host_box,
    guest_name,
    leg_type,
    num_switches,
    transition_steps,
):
    x0 = combined_coords
    v0 = np.zeros_like(x0)
    print(
        f"{leg_type.upper()}_SYSTEM",
        f"guest_name: {guest_name}",
        f"num_atoms: {len(x0)}",
    )

    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

    # TODO: pre-equilibrate?

    # equilibrate & shoot off switching jobs
    steps_per_batch = 1001

    works = []
    for b in range(num_switches):
        equil2_lambda_schedule = np.ones(steps_per_batch) * MIN_LAMBDA
        ctxt.multiple_steps(equil2_lambda_schedule, 0)
        lamb = equil2_lambda_schedule[-1]
        step = len(equil2_lambda_schedule) - 1
        report.report_step(
            ctxt,
            (b + 1) * step,
            lamb,
            host_box,
            combined_bps,
            u_impls,
            guest_name,
            num_switches * steps_per_batch,
            f"{leg_type.upper()}_EQUILIBRATION_2",
        )

        if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps,
                                 u_impls):
            return

        work = do_switch(
            ctxt.get_x_t(),
            ctxt.get_v_t(),
            combined_bps,
            combined_masses,
            host_box,
            guest_name,
            leg_type,
            u_impls,
            transition_steps,
        )
        works.append(work)

    return works
Exemple #22
0
    def simulate_futures(
            self,
            ff_params,
            mol,
            x0,
            box0,
            prefix,
            core_idxs=None,
            seed=0
    ) -> Tuple[List[Any], estimator_abfe.FreeEnergyModel, List[Any]]:
        top = self.setup_topology(mol)

        afe = free_energy_rabfe.AbsoluteFreeEnergy(mol, top)

        unbound_potentials, sys_params, masses = afe.prepare_host_edge(
            ff_params, self.host_system)

        if seed == 0:
            seed = np.random.randint(np.iinfo(np.int32).max)

        beta = 1 / (constants.BOLTZ * self.temperature)

        bond_list = get_bond_list(unbound_potentials[0])
        masses = model_utils.apply_hmr(masses, bond_list)
        friction = 1.0
        integrator = LangevinIntegrator(self.temperature, self.dt, friction,
                                        masses, seed)

        group_indices = get_group_indices(bond_list)
        barostat_interval = 5
        barostat = MonteCarloBarostat(x0.shape[0], self.pressure,
                                      self.temperature, group_indices,
                                      barostat_interval, seed)

        v0 = np.zeros_like(x0)

        endpoint_correct = False
        model = estimator_abfe.FreeEnergyModel(
            unbound_potentials,
            endpoint_correct,
            self.client,
            box0,
            x0,
            v0,
            integrator,
            barostat,
            self.host_schedule,
            self.equil_steps,
            self.prod_steps,
            beta,
            prefix,
        )
        bound_potentials = []
        for params, unbound_pot in zip(sys_params, model.unbound_potentials):
            bp = unbound_pot.bind(np.asarray(params))
            bound_potentials.append(bp)

        all_args = []
        for lamb_idx, lamb in enumerate(model.lambda_schedule):

            subsample_interval = 1000

            all_args.append((
                lamb,
                model.box,
                model.x0,
                model.v0,
                bound_potentials,
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                model.lambda_schedule,
            ))

        if endpoint_correct:

            assert isinstance(bound_potentials[-1], potentials.HarmonicBond)

            all_args.append((
                1.0,
                model.box,
                model.x0,
                model.v0,
                bound_potentials[:-1],  # strip out the restraints
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                [],  # no need to evaluate Us for the endpoint correction
            ))

        futures = []
        if self.client is None:
            for args in all_args:
                futures.append(_MockFuture(estimator_abfe.simulate(*args)))
        else:
            for args in all_args:
                futures.append(
                    self.client.submit(estimator_abfe.simulate, *args))
        return sys_params, model, futures
Exemple #23
0
def main(args, stage):

    # benzene = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1")) # a
    # phenol = Chem.AddHs(Chem.MolFromSmiles("Oc1ccccc1")) # b
    #01234567890
    benzene = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1"))  # a
    phenol = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1"))  # b

    AllChem.EmbedMolecule(benzene)
    AllChem.EmbedMolecule(phenol)

    ff_handlers = deserialize_handlers(
        open('ff/params/smirnoff_1_1_0_ccc.py').read())
    r_benzene = Recipe.from_rdkit(benzene, ff_handlers)
    r_phenol = Recipe.from_rdkit(phenol, ff_handlers)

    r_combined = r_benzene.combine(r_phenol)
    core_pairs = np.array(
        [
            [0, 0],
            [1, 1],
            [2, 2],
            [3, 3],
            [4, 4],
            [5, 5],
            [6, 6],
            [7, 7],
            [8, 8],
            [9, 9],
            # [10,10]
        ],
        dtype=np.int32)
    core_pairs[:, 1] += benzene.GetNumAtoms()

    a_idxs = np.arange(benzene.GetNumAtoms())
    b_idxs = np.arange(phenol.GetNumAtoms()) + benzene.GetNumAtoms()

    core_k = 20.0

    if stage == 0:
        centroid_k = 200.0
        rbfe.stage_0(r_combined, b_idxs, core_pairs, centroid_k, core_k)
        # lambda_schedule = np.linspace(0.0, 1.0, 2)
        # lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
        lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
    elif stage == 1:
        rbfe.stage_1(r_combined, a_idxs, b_idxs, core_pairs, core_k)
        lambda_schedule = np.linspace(0.0, 1.2, 60)
    else:
        assert 0

    system, host_coords, box, topology = builders.build_water_system(4.0)

    r_host = Recipe.from_openmm(system)
    r_final = r_host.combine(r_combined)

    # minimize coordinates of host + ligand A
    ha_coords = np.concatenate([host_coords, get_romol_conf(benzene)])

    pool = Pool(args.num_gpus)

    # we need to run this in a subprocess since the cuda runtime
    # must not be initialized in the master thread due to lack of
    # fork safety
    r_minimize = minimize_setup(r_host, r_benzene)
    ha_coords = pool.map(
        minimize,
        [(r_minimize.bound_potentials, r_minimize.masses, ha_coords, box)],
        chunksize=1)
    # this is a list
    ha_coords = ha_coords[0]
    pool.close()

    pool = Pool(args.num_gpus)

    x0 = np.concatenate([ha_coords, get_romol_conf(phenol)])

    masses = np.concatenate([r_host.masses, r_benzene.masses, r_phenol.masses])

    seed = np.random.randint(np.iinfo(np.int32).max)

    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed)

    # production run at various values of lambda
    for epoch in range(10):
        avg_du_dls = []

        run_args = []
        for lamb_idx, lamb in enumerate(lambda_schedule):
            run_args.append(
                (lamb, intg, r_final.bound_potentials, r_final.masses, x0, box,
                 lamb_idx % args.num_gpus, stage))

        avg_du_dls = pool.map(run, run_args, chunksize=1)

        print("stage", stage, "epoch", epoch, "dG",
              np.trapz(avg_du_dls, lambda_schedule))
Exemple #24
0
def run_leg(
    orig_host_coords,
    orig_guest_coords,
    combined_bps,
    combined_masses,
    host_box,
    guest_name,
    leg_type,
    host_mol,
    guest_mol,
    outdir,
    fewer_outfiles=False,
    no_outfiles=False,
):
    x0 = np.concatenate([orig_host_coords, orig_guest_coords])
    v0 = np.zeros_like(x0)
    print(
        f"{leg_type.upper()}_SYSTEM",
        f"guest_name: {guest_name}",
        f"num_atoms: {len(x0)}",
    )

    seed = 2021
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    u_impls = []
    for bp in combined_bps:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls)

    # insert guest
    insertion_lambda_schedule = np.linspace(
        INSERTION_MAX_LAMBDA, MIN_LAMBDA, TRANSITION_STEPS
    )
    for step, lamb in enumerate(insertion_lambda_schedule):
        ctxt.step(lamb)
        if step % 100 == 0:
            report.report_step(
                ctxt,
                step,
                lamb,
                host_box,
                combined_bps,
                u_impls,
                guest_name,
                TRANSITION_STEPS,
                f"{leg_type.upper()}_INSERTION",
            )
            if not fewer_outfiles and not no_outfiles:
                host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10
                report.write_frame(
                    host_coords,
                    host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(TRANSITION_STEPS))),
                    f"{leg_type}-ins",
                )
        if step in (0, int(TRANSITION_STEPS/2), TRANSITION_STEPS-1):
            if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls):
                return

    # equilibrate
    for step in range(EQ1_STEPS):
        ctxt.step(MIN_LAMBDA)
        if step % 1000 == 0:
            report.report_step(
                ctxt,
                step,
                MIN_LAMBDA,
                host_box,
                combined_bps,
                u_impls,
                guest_name,
                EQ1_STEPS,
                f"{leg_type.upper()}_EQUILIBRATION_1",
            )
            if not fewer_outfiles and not no_outfiles:
                host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10
                report.write_frame(
                    host_coords,
                    host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(EQ1_STEPS))),
                    f"{leg_type}-eq1",
                )
        if step in (0, int(EQ1_STEPS/2), EQ1_STEPS-1):
            if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls):
                return

    # equilibrate more & shoot off deletion jobs
    for step in range(EQ2_STEPS):
        ctxt.step(MIN_LAMBDA)
        if step % 1000 == 0:
            report.report_step(
                ctxt,
                step,
                MIN_LAMBDA,
                host_box,
                combined_bps,
                u_impls,
                guest_name,
                EQ2_STEPS,
                f"{leg_type.upper()}_EQUILIBRATION_2",
            )

            # TODO: if guest has undocked, stop simulation
            if not no_outfiles:
                host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10
                guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10
                report.write_frame(
                    host_coords,
                    host_mol,
                    guest_coords,
                    guest_mol,
                    guest_name,
                    outdir,
                    str(step).zfill(len(str(EQ2_STEPS))),
                    f"{leg_type}-eq2",
                )
            if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls):
                return

            do_deletion(
                ctxt.get_x_t(),
                ctxt.get_v_t(),
                combined_bps,
                combined_masses,
                host_box,
                guest_name,
                leg_type,
            )
Exemple #25
0
def benchmark(
    label,
    masses,
    lamb,
    x0,
    v0,
    box,
    bound_potentials,
    hmr=False,
    verbose=True,
    num_batches=100,
    steps_per_batch=1000,
    compute_du_dl_interval=0,
    barostat_interval=0,
):
    """
    TODO: configuration blob containing num_batches, steps_per_batch, and any other options
    """

    seed = 1234
    dt = 1.5e-3
    temperature = 300
    pressure = 1.0
    seconds_per_day = 86400

    harmonic_bond_potential = bound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    if hmr:
        dt = 2.5e-3
        masses = apply_hmr(masses, bond_list)
    intg = LangevinIntegrator(temperature, dt, 1.0, np.array(masses), seed).impl()

    bps = []

    for potential in bound_potentials:
        bps.append(potential.bound_impl(precision=np.float32))  # get the bound implementation

    baro_impl = None
    if barostat_interval > 0:
        group_idxs = get_group_indices(bond_list)
        baro = MonteCarloBarostat(
            x0.shape[0],
            pressure,
            temperature,
            group_idxs,
            barostat_interval,
            seed,
        )
        baro_impl = baro.impl(bps)

    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg,
        bps,
        barostat=baro_impl,
    )

    batch_times = []

    lambda_schedule = np.ones(steps_per_batch) * lamb

    # run once before timer starts
    ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval)

    start = time.time()

    for batch in range(num_batches):

        # time the current batch
        batch_start = time.time()
        du_dls, _, _ = ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval)
        batch_end = time.time()

        delta = batch_end - batch_start

        batch_times.append(delta)

        steps_per_second = steps_per_batch / np.mean(batch_times)
        steps_per_day = steps_per_second * seconds_per_day

        ps_per_day = dt * steps_per_day
        ns_per_day = ps_per_day * 1e-3

        if verbose:
            print(f"steps per second: {steps_per_second:.3f}")
            print(f"ns per day: {ns_per_day:.3f}")

    assert np.all(np.abs(ctxt.get_x_t()) < 1000)

    print(
        f"{label}: N={x0.shape[0]} speed: {ns_per_day:.2f}ns/day dt: {dt*1e3}fs (ran {steps_per_batch * num_batches} steps in {(time.time() - start):.2f}s)"
    )
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # define NPT ensemble
    potential_energy_model = PotentialEnergyModel(sys_params,
                                                  unbound_potentials)
    ensemble = NPTEnsemble(potential_energy_model, temperature, pressure)

    # define a thermostat
    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    def reduced_potential_fxn(x, box, lam):
        u, du_dx = ensemble.reduced_potential_and_gradient(x, box, lam)
        return u

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    trajs = []
Exemple #27
0
            # out_dir = os.path.join(epoch_dir, "mol_"+mol.GetProp("_Name"))\
            # if not os.path.exists(out_dir):
            # os.makedirs(out_dir)

            # safety guard
            try:

                potentials, masses, vjp_fns = hydration_setup.combine_potentials(
                    ff_handlers, mol, host_system, precision=np.float32)

                coords = hydration_setup.combine_coordinates(host_coords, mol)

                seed = np.random.randint(0, np.iinfo(np.int32).max)

                intg = LangevinIntegrator(float(intg_cfg["temperature"]),
                                          float(intg_cfg["dt"]),
                                          float(intg_cfg["friction"]), masses,
                                          seed)

                sim = simulation.Simulation(coords, np.zeros_like(coords), box,
                                            potentials, intg)

                (pred_dG,
                 pred_err), grad_dG, du_dls = hydration_model.simulate(
                     sim, num_steps, lambda_schedule, stubs)

                plt.plot(lambda_schedule, du_dls)
                plt.ylabel("du_dlambda")
                plt.xlabel("lambda")
                plt.savefig(
                    os.path.join(epoch_dir, "ti_mol_" + mol.GetProp("_Name")))
                plt.clf()
Exemple #28
0
# note: lambda goes from 0 to 1, 0 being fully-interacting and 1.0 being fully interacting.
for lamb_idx, final_lamb in enumerate(np.linspace(1, 0, 8)):

    # write some conformations into this PDB file
    writer = pdb_writer.PDBWriter([omm_topology, romol_a, romol_b],
                                  "debug_" + str(lamb_idx) + ".pdb")

    seed = 2020

    # note: the .impl() call at the end returns a pickle-able version of the
    #   wrapper function -- since contexts are not pickle-able -- which will
    #   be useful later in timemachine's multi-device parallelization strategy)
    # note: OpenMM unit system used throughout
    #   (temperature: kelvin, timestep: picosecond, collision_rate: picosecond^-1)
    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    u_impls = []

    for bp in final_potentials:
        u_impls.append(bp.bound_impl(np.float32))

    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, intg, u_impls)

    for step, lamb in enumerate(np.linspace(1.0, final_lamb, 1000)):
        if step % 500 == 0:
            writer.write_frame(ctxt.get_x_t() * 10)
Exemple #29
0
def test_barostat_partial_group_idxs():
    """Verify that the barostat can handle a subset of the molecules
    rather than all of them. This test only verify that it runs, not the behavior"""
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    barostat_interval = 3
    collision_rate = 1.0 / unit.picosecond
    seed = 2021
    np.random.seed(seed)

    pressure = 1.0 * unit.atmosphere
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    # Cut the number of groups in half
    group_indices = group_indices[len(group_indices) // 2:]
    lam = 1.0

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    v_0 = sample_velocities(masses * unit.amu, temperature)

    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        barostat_interval,
        u_impls,
        seed,
    )

    ctxt = custom_ops.Context(coords,
                              v_0,
                              complex_box,
                              integrator_impl,
                              u_impls,
                              barostat=baro)
    ctxt.multiple_steps(np.ones(1000) * lam)
Exemple #30
0
                    mol,
                    host_system,
                    precision=np.float32
                )

                coords = hydration_setup.combine_coordinates(
                    host_coords,
                    mol
                )

                seed = np.random.randint(0, np.iinfo(np.int32).max)

                intg = LangevinIntegrator(
                    float(intg_cfg['temperature']),
                    float(intg_cfg['dt']),
                    float(intg_cfg['friction']),
                    masses,
                    seed
                )

                sim = simulation.Simulation(
                    coords,
                    np.zeros_like(coords),
                    box,
                    potentials,
                    intg
                )

                (pred_dG, pred_err), grad_dG, du_dls = hydration_model.simulate(
                    sim,
                    num_steps,