Example #1
0
    def simulate_futures(
            self,
            ff_params,
            mol,
            x0,
            box0,
            prefix,
            core_idxs=None,
            seed=0
    ) -> Tuple[List[Any], estimator_abfe.FreeEnergyModel, List[Any]]:
        top = self.setup_topology(mol)

        afe = free_energy_rabfe.AbsoluteFreeEnergy(mol, top)

        unbound_potentials, sys_params, masses = afe.prepare_host_edge(
            ff_params, self.host_system)

        if seed == 0:
            seed = np.random.randint(np.iinfo(np.int32).max)

        beta = 1 / (constants.BOLTZ * self.temperature)

        bond_list = get_bond_list(unbound_potentials[0])
        masses = model_utils.apply_hmr(masses, bond_list)
        friction = 1.0
        integrator = LangevinIntegrator(self.temperature, self.dt, friction,
                                        masses, seed)

        group_indices = get_group_indices(bond_list)
        barostat_interval = 5
        barostat = MonteCarloBarostat(x0.shape[0], self.pressure,
                                      self.temperature, group_indices,
                                      barostat_interval, seed)

        v0 = np.zeros_like(x0)

        endpoint_correct = False
        model = estimator_abfe.FreeEnergyModel(
            unbound_potentials,
            endpoint_correct,
            self.client,
            box0,
            x0,
            v0,
            integrator,
            barostat,
            self.host_schedule,
            self.equil_steps,
            self.prod_steps,
            beta,
            prefix,
        )
        bound_potentials = []
        for params, unbound_pot in zip(sys_params, model.unbound_potentials):
            bp = unbound_pot.bind(np.asarray(params))
            bound_potentials.append(bp)

        all_args = []
        for lamb_idx, lamb in enumerate(model.lambda_schedule):

            subsample_interval = 1000

            all_args.append((
                lamb,
                model.box,
                model.x0,
                model.v0,
                bound_potentials,
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                model.lambda_schedule,
            ))

        if endpoint_correct:

            assert isinstance(bound_potentials[-1], potentials.HarmonicBond)

            all_args.append((
                1.0,
                model.box,
                model.x0,
                model.v0,
                bound_potentials[:-1],  # strip out the restraints
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                [],  # no need to evaluate Us for the endpoint correction
            ))

        futures = []
        if self.client is None:
            for args in all_args:
                futures.append(_MockFuture(estimator_abfe.simulate(*args)))
        else:
            for args in all_args:
                futures.append(
                    self.client.submit(estimator_abfe.simulate, *args))
        return sys_params, model, futures
Example #2
0
    def _futures_a_to_b(self, ff_params, mol_a, mol_b, combined_core_idxs, x0,
                        box0, prefix, seed):

        num_host_atoms = x0.shape[0] - mol_a.GetNumAtoms() - mol_b.GetNumAtoms(
        )

        # (ytz): super ugly, undo combined_core_idxs to get back original idxs
        core_idxs = combined_core_idxs - num_host_atoms
        core_idxs[:, 1] -= mol_a.GetNumAtoms()

        dual_topology = self.setup_topology(mol_a, mol_b)
        rfe = free_energy_rabfe.RelativeFreeEnergy(dual_topology)

        unbound_potentials, sys_params, masses = rfe.prepare_host_edge(
            ff_params, self.host_system)

        k_core = 30.0

        core_params = np.zeros_like(combined_core_idxs).astype(np.float64)
        core_params[:, 0] = k_core

        restraint_potential = potentials.HarmonicBond(combined_core_idxs, )

        unbound_potentials.append(restraint_potential)
        sys_params.append(core_params)

        # tbd sample from boltzmann distribution later
        v0 = np.zeros_like(x0)

        beta = 1 / (constants.BOLTZ * self.temperature)

        bond_list = np.concatenate(
            [unbound_potentials[0].get_idxs(), core_idxs])
        masses = model_utils.apply_hmr(masses, bond_list)

        friction = 1.0
        integrator = LangevinIntegrator(self.temperature, self.dt, friction,
                                        masses, seed)
        bond_list = list(map(tuple, bond_list))
        group_indices = get_group_indices(bond_list)
        barostat_interval = 5

        barostat = MonteCarloBarostat(x0.shape[0], self.pressure,
                                      self.temperature, group_indices,
                                      barostat_interval, seed)

        endpoint_correct = True
        model = estimator_abfe.FreeEnergyModel(
            unbound_potentials,
            endpoint_correct,
            self.client,
            box0,  # important, use equilibrated box.
            x0,
            v0,
            integrator,
            barostat,
            self.host_schedule,
            self.equil_steps,
            self.prod_steps,
            beta,
            prefix,
        )

        bound_potentials = []
        for params, unbound_pot in zip(sys_params, model.unbound_potentials):
            bp = unbound_pot.bind(np.asarray(params))
            bound_potentials.append(bp)

        all_args = []
        for lamb_idx, lamb in enumerate(model.lambda_schedule):

            subsample_interval = 1000

            all_args.append((
                lamb,
                model.box,
                model.x0,
                model.v0,
                bound_potentials,
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                model.lambda_schedule,
            ))

        if endpoint_correct:

            assert isinstance(bound_potentials[-1], potentials.HarmonicBond)

            all_args.append((
                1.0,
                model.box,
                model.x0,
                model.v0,
                bound_potentials[:-1],  # strip out the restraints
                model.integrator,
                model.barostat,
                model.equil_steps,
                model.prod_steps,
                subsample_interval,
                subsample_interval,
                [],  # no need to evaluate Us for the endpoint correction
            ))

        futures = []
        if self.client is None:
            for args in all_args:
                futures.append(_MockFuture(estimator_abfe.simulate(*args)))
        else:
            for args in all_args:
                futures.append(
                    self.client.submit(estimator_abfe.simulate, *args))

        return sys_params, model, futures
Example #3
0
def equilibrate_host(
    mol: Chem.Mol,
    host_system: openmm.System,
    host_coords: NDArray,
    temperature: float,
    pressure: float,
    ff: Forcefield,
    box: NDArray,
    n_steps: int,
    seed: Optional[int] = None,
) -> Tuple[NDArray, NDArray]:
    """
    Equilibrate a host system given a reference molecule using the MonteCarloBarostat.

    Useful for preparing a host that will be used for multiple FEP calculations using the same reference, IE a starmap.

    Performs the following:
    - Minimize host with rigid mol
    - Minimize host and mol
    - Run n_steps with HMR enabled and MonteCarloBarostat every 5 steps

    Parameters
    ----------
    mol: Chem.Mol
        Ligand for the host to equilibrate with.

    host_system: openmm.System
        OpenMM System representing the host.

    host_coords: np.ndarray
        N x 3 coordinates of the host. units of nanometers.

    temperature: float
        Temperature at which to run the simulation. Units of kelvins.

    pressure: float
        Pressure at which to run the simulation. Units of bars.

    ff: ff.Forcefield
        Wrapper class around a list of handlers.

    box: np.ndarray [3,3]
        Box matrix for periodic boundary conditions. units of nanometers.

    n_steps: int
        Number of steps to run the simulation for.

    seed: int or None
        Value to seed simulation with

    Returns
    -------
    tuple (coords, box)
        Returns equilibrated system coords as well as the box.

    """
    # insert mol into the binding pocket.
    host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2)

    min_host_coords = minimize_host_4d([mol], host_system, host_coords, ff, box)

    ligand_masses = [a.GetMass() for a in mol.GetAtoms()]
    ligand_coords = get_romol_conf(mol)

    combined_masses = np.concatenate([host_masses, ligand_masses])
    combined_coords = np.concatenate([min_host_coords, ligand_coords])

    top = topology.BaseTopology(mol, ff)
    hgt = topology.HostGuestTopology(host_bps, top)

    # setup the parameter handlers for the ligand
    tuples = [
        [hgt.parameterize_harmonic_bond, [ff.hb_handle]],
        [hgt.parameterize_harmonic_angle, [ff.ha_handle]],
        [hgt.parameterize_periodic_torsion, [ff.pt_handle, ff.it_handle]],
        [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]],
    ]

    u_impls = []
    bound_potentials = []

    for fn, handles in tuples:
        params, potential = fn(*[h.params for h in handles])
        bp = potential.bind(params)
        bound_potentials.append(bp)
        u_impls.append(bp.bound_impl(precision=np.float32))

    bond_list = get_bond_list(bound_potentials[0])
    combined_masses = model_utils.apply_hmr(combined_masses, bond_list)

    dt = 2.5e-3
    friction = 1.0

    if seed is None:
        seed = np.random.randint(np.iinfo(np.int32).max)

    integrator = LangevinIntegrator(temperature, dt, friction, combined_masses, seed).impl()

    x0 = combined_coords
    v0 = np.zeros_like(x0)

    group_indices = get_group_indices(bond_list)
    barostat_interval = 5
    barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_indices, barostat_interval, seed).impl(
        u_impls
    )

    # Re-minimize with the mol being flexible
    x0 = fire_minimize(x0, u_impls, box, np.ones(50))
    # context components: positions, velocities, box, integrator, energy fxns
    ctxt = custom_ops.Context(x0, v0, box, integrator, u_impls, barostat)

    ctxt.multiple_steps(np.linspace(0.0, 0.0, n_steps))

    return ctxt.get_x_t(), ctxt.get_box()
Example #4
0
    def equilibrate_edges(
        self,
        edges: List[Tuple[Chem.Mol, Chem.Mol, np.ndarray]],
        lamb: float = 0.0,
        barostat_interval: int = 10,
        equilibration_steps: int = 100000,
        cache_path: str = "equilibration_cache.pkl",
    ):
        """
        edges: List of tuples with mol_a, mol_b, core
            Edges to equilibrate

        lamb: float
            Lambda value to equilibrate at. Uses Dual Topology to equilibrate

        barostat_interval: int
            Interval on which to run barostat during equilibration

        equilibration_steps: int
            Number of steps to equilibrate the edge for

        cache_path: string
            Path to look for existing cache or path to where to save cache. By default
            it will write out a pickle file in the local directory.

        Pre equilibrate edges and cache them for later use in predictions.

        Parallelized via the model client if possible
        """
        if not self.pre_equilibrate:
            return
        if os.path.isfile(cache_path):
            with open(cache_path, "rb") as ifs:
                self._equil_cache = load(ifs)
            print("Loaded Pre-equilibrated structures from cache")
            return
        futures = []
        ordered_params = self.ff.get_ordered_params()

        temperature = 300.0
        pressure = 1.0

        for stage, host_system, host_coords, host_box in [
            ("complex", self.complex_system, self.complex_coords,
             self.complex_box),
            ("solvent", self.solvent_system, self.solvent_coords,
             self.solvent_box),
        ]:
            # Run all complex legs first then solvent, as they will likely take longer than then solvent leg
            for mol_a, mol_b, core in edges:
                # Use DualTopology to ensure mols exist in the same space.
                topo = topology.DualTopologyMinimization(mol_a, mol_b, self.ff)
                rfe = free_energy.RelativeFreeEnergy(topo)
                min_coords = minimizer.minimize_host_4d([mol_a, mol_b],
                                                        host_system,
                                                        host_coords, self.ff,
                                                        host_box)
                unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                    ordered_params, host_system, min_coords)
                # num_host_coords = len(host_coords)
                # masses[num_host_coords:] *= 1000000 # Lets see if masses are the difference
                harmonic_bond_potential = unbound_potentials[0]
                bond_list = get_bond_list(harmonic_bond_potential)
                group_idxs = get_group_indices(bond_list)
                time_step = 1.5e-3
                if self.hmr:
                    masses = apply_hmr(masses, bond_list)
                    time_step = 2.5e-3
                integrator = LangevinIntegrator(temperature, time_step, 1.0,
                                                masses, 0)
                barostat = MonteCarloBarostat(coords.shape[0], pressure,
                                              temperature, group_idxs,
                                              barostat_interval, 0)
                pots = []
                for bp, params in zip(unbound_potentials, sys_params):
                    pots.append(bp.bind(np.asarray(params)))
                future = self.client.submit(
                    estimator.equilibrate, *[
                        integrator, barostat, pots, coords, host_box, lamb,
                        equilibration_steps
                    ])
                futures.append((stage, (mol_a, mol_b, core), future))
        num_equil = len(futures)
        for i, (stage, edge, future) in enumerate(futures):
            edge_hash = self._edge_hash(stage, *edge)
            self._equil_cache[edge_hash] = future.result()
            if (i + 1) % 5 == 0:
                print(f"Pre-equilibrated {i+1} of {num_equil} edges")
        print(f"Pre-equilibrated {num_equil} edges")
        if cache_path:
            with open(cache_path, "wb") as ofs:
                dump(self._equil_cache, ofs)
            print(f"Saved equilibration_cache to {cache_path}")
Example #5
0
    def predict(self, ff_params: list, mol_a: Chem.Mol, mol_b: Chem.Mol,
                core: np.ndarray):
        """
        Predict the ddG of morphing mol_a into mol_b. This function is differentiable w.r.t. ff_params.

        Parameters
        ----------

        ff_params: list of np.ndarray
            This should match the ordered params returned by the forcefield

        mol_a: Chem.Mol
            Starting molecule corresponding to lambda = 0

        mol_b: Chem.Mol
            Starting molecule corresponding to lambda = 1

        core: np.ndarray
            N x 2 list of ints corresponding to the atom mapping of the core.

        Returns
        -------
        float
            delta delta G in kJ/mol
        aux
            list of TI results
        """

        stage_dGs = []
        stage_results = []

        for stage, host_system, host_coords, host_box, lambda_schedule in [
            ("complex", self.complex_system, self.complex_coords,
             self.complex_box, self.complex_schedule),
            ("solvent", self.solvent_system, self.solvent_coords,
             self.solvent_box, self.solvent_schedule),
        ]:
            single_topology = topology.SingleTopology(mol_a, mol_b, core,
                                                      self.ff)
            rfe = free_energy.RelativeFreeEnergy(single_topology)
            edge_hash = self._edge_hash(stage, mol_a, mol_b, core)
            if self.pre_equilibrate and edge_hash in self._equil_cache:
                cached_state = self._equil_cache[edge_hash]
                x0 = cached_state.coords
                host_box = cached_state.box
                num_host_coords = len(host_coords)
                unbound_potentials, sys_params, masses, _ = rfe.prepare_host_edge(
                    ff_params, host_system, host_coords)
                mol_a_size = mol_a.GetNumAtoms()
                # Use Dual Topology to pre equilibrate, so have to get the mean of the two sets of mol,
                # normally done within prepare_host_edge, but the whole system has moved by this stage
                x0 = np.concatenate([
                    x0[:num_host_coords],
                    np.mean(
                        single_topology.interpolate_params(
                            x0[num_host_coords:num_host_coords + mol_a_size],
                            x0[num_host_coords + mol_a_size:]),
                        axis=0,
                    ),
                ])
            else:
                if self.pre_equilibrate:
                    print(
                        "Edge not correctly pre-equilibrated, ensure equilibrate_edges was called"
                    )
                print(
                    f"Minimizing the {stage} host structure to remove clashes."
                )
                # (ytz): this isn't strictly symmetric, and we should modify minimize later on remove
                # the hysteresis by jointly minimizing against a and b at the same time. We may also want
                # to remove the randomness completely from the minimization.
                min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b],
                                                             host_system,
                                                             host_coords,
                                                             self.ff, host_box)

                unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                    ff_params, host_system, min_host_coords)

                x0 = coords
            v0 = np.zeros_like(x0)

            time_step = 1.5e-3

            harmonic_bond_potential = unbound_potentials[0]
            bond_list = get_bond_list(harmonic_bond_potential)
            if self.hmr:
                masses = apply_hmr(masses, bond_list)
                time_step = 2.5e-3
            group_idxs = get_group_indices(bond_list)

            seed = 0

            temperature = 300.0
            pressure = 1.0

            integrator = LangevinIntegrator(temperature, time_step, 1.0,
                                            masses, seed)

            barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                          group_idxs, self.barostat_interval,
                                          seed)

            model = estimator.FreeEnergyModel(
                unbound_potentials,
                self.client,
                host_box,
                x0,
                v0,
                integrator,
                lambda_schedule,
                self.equil_steps,
                self.prod_steps,
                barostat,
            )

            dG, results = estimator.deltaG(model, sys_params)

            stage_dGs.append(dG)
            stage_results.append((stage, results))

        pred = stage_dGs[0] - stage_dGs[1]

        return pred, stage_results
Example #6
0
def benchmark(
    label,
    masses,
    lamb,
    x0,
    v0,
    box,
    bound_potentials,
    hmr=False,
    verbose=True,
    num_batches=100,
    steps_per_batch=1000,
    compute_du_dl_interval=0,
    barostat_interval=0,
):
    """
    TODO: configuration blob containing num_batches, steps_per_batch, and any other options
    """

    seed = 1234
    dt = 1.5e-3
    temperature = 300
    pressure = 1.0
    seconds_per_day = 86400

    harmonic_bond_potential = bound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    if hmr:
        dt = 2.5e-3
        masses = apply_hmr(masses, bond_list)
    intg = LangevinIntegrator(temperature, dt, 1.0, np.array(masses), seed).impl()

    bps = []

    for potential in bound_potentials:
        bps.append(potential.bound_impl(precision=np.float32))  # get the bound implementation

    baro_impl = None
    if barostat_interval > 0:
        group_idxs = get_group_indices(bond_list)
        baro = MonteCarloBarostat(
            x0.shape[0],
            pressure,
            temperature,
            group_idxs,
            barostat_interval,
            seed,
        )
        baro_impl = baro.impl(bps)

    ctxt = custom_ops.Context(
        x0,
        v0,
        box,
        intg,
        bps,
        barostat=baro_impl,
    )

    batch_times = []

    lambda_schedule = np.ones(steps_per_batch) * lamb

    # run once before timer starts
    ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval)

    start = time.time()

    for batch in range(num_batches):

        # time the current batch
        batch_start = time.time()
        du_dls, _, _ = ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval)
        batch_end = time.time()

        delta = batch_end - batch_start

        batch_times.append(delta)

        steps_per_second = steps_per_batch / np.mean(batch_times)
        steps_per_day = steps_per_second * seconds_per_day

        ps_per_day = dt * steps_per_day
        ns_per_day = ps_per_day * 1e-3

        if verbose:
            print(f"steps per second: {steps_per_second:.3f}")
            print(f"ns per day: {ns_per_day:.3f}")

    assert np.all(np.abs(ctxt.get_x_t()) < 1000)

    print(
        f"{label}: N={x0.shape[0]} speed: {ns_per_day:.2f}ns/day dt: {dt*1e3}fs (ran {steps_per_batch * num_batches} steps in {(time.time() - start):.2f}s)"
    )