Esempio n. 1
0
def main(args, stage):

    # benzene = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1")) # a
    # phenol = Chem.AddHs(Chem.MolFromSmiles("Oc1ccccc1")) # b
    #01234567890
    benzene = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1"))  # a
    phenol = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1"))  # b

    AllChem.EmbedMolecule(benzene)
    AllChem.EmbedMolecule(phenol)

    ff_handlers = deserialize_handlers(
        open('ff/params/smirnoff_1_1_0_ccc.py').read())
    r_benzene = Recipe.from_rdkit(benzene, ff_handlers)
    r_phenol = Recipe.from_rdkit(phenol, ff_handlers)

    r_combined = r_benzene.combine(r_phenol)
    core_pairs = np.array(
        [
            [0, 0],
            [1, 1],
            [2, 2],
            [3, 3],
            [4, 4],
            [5, 5],
            [6, 6],
            [7, 7],
            [8, 8],
            [9, 9],
            # [10,10]
        ],
        dtype=np.int32)
    core_pairs[:, 1] += benzene.GetNumAtoms()

    a_idxs = np.arange(benzene.GetNumAtoms())
    b_idxs = np.arange(phenol.GetNumAtoms()) + benzene.GetNumAtoms()

    core_k = 20.0

    if stage == 0:
        centroid_k = 200.0
        rbfe.stage_0(r_combined, b_idxs, core_pairs, centroid_k, core_k)
        # lambda_schedule = np.linspace(0.0, 1.0, 2)
        # lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
        lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
    elif stage == 1:
        rbfe.stage_1(r_combined, a_idxs, b_idxs, core_pairs, core_k)
        lambda_schedule = np.linspace(0.0, 1.2, 60)
    else:
        assert 0

    system, host_coords, box, topology = builders.build_water_system(4.0)

    r_host = Recipe.from_openmm(system)
    r_final = r_host.combine(r_combined)

    # minimize coordinates of host + ligand A
    ha_coords = np.concatenate([host_coords, get_romol_conf(benzene)])

    pool = Pool(args.num_gpus)

    # we need to run this in a subprocess since the cuda runtime
    # must not be initialized in the master thread due to lack of
    # fork safety
    r_minimize = minimize_setup(r_host, r_benzene)
    ha_coords = pool.map(
        minimize,
        [(r_minimize.bound_potentials, r_minimize.masses, ha_coords, box)],
        chunksize=1)
    # this is a list
    ha_coords = ha_coords[0]
    pool.close()

    pool = Pool(args.num_gpus)

    x0 = np.concatenate([ha_coords, get_romol_conf(phenol)])

    masses = np.concatenate([r_host.masses, r_benzene.masses, r_phenol.masses])

    seed = np.random.randint(np.iinfo(np.int32).max)

    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed)

    # production run at various values of lambda
    for epoch in range(10):
        avg_du_dls = []

        run_args = []
        for lamb_idx, lamb in enumerate(lambda_schedule):
            run_args.append(
                (lamb, intg, r_final.bound_potentials, r_final.masses, x0, box,
                 lamb_idx % args.num_gpus, stage))

        avg_du_dls = pool.map(run, run_args, chunksize=1)

        print("stage", stage, "epoch", epoch, "dG",
              np.trapz(avg_du_dls, lambda_schedule))
Esempio n. 2
0
    cmd_args = parser.parse_args()

    multiprocessing.set_start_method('spawn') # CUDA runtime is not forkable
    pool = multiprocessing.Pool(cmd_args.num_gpus)

    suppl = Chem.SDMolSupplier('tests/data/benzene_fluorinated.sdf', removeHs=False)
    all_mols = [x for x in suppl]
    mol_a = all_mols[0]
    mol_b = all_mols[1]

    ff_handlers = deserialize_handlers(open('ff/params/smirnoff_1_1_0_ccc.py').read())
    ff = Forcefield(ff_handlers)

    # the water system first.
    solvent_system, solvent_coords, solvent_box, omm_topology = builders.build_water_system(4.0)
    solvent_box += np.eye(3)*0.1 # BFGS this later

    print("Minimizing the host structure to remove clashes.")
    minimized_solvent_coords = minimizer.minimize_host_4d(mol_a, solvent_system, solvent_coords, ff, solvent_box)

    absolute_lambda_schedule = np.concatenate([
        np.linspace(0.0, 0.333, cmd_args.num_absolute_windows - cmd_args.num_absolute_windows//3, endpoint=False),
        np.linspace(0.333, 1.0, cmd_args.num_absolute_windows//3),
    ])

    abs_dGs = []

    for idx, mol in enumerate([mol_a, mol_b]):

        afe = free_energy.AbsoluteFreeEnergy(mol, ff)
Esempio n. 3
0
def run_epoch(ff, mol_a, mol_b, core):
    # build the protein system.
    complex_system, complex_coords, _, _, complex_box = builders.build_protein_system('tests/data/hif2a_nowater_min.pdb')
    complex_box += np.eye(3)*0.1 # BFGS this later

    # build the water system.
    solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(4.0)
    solvent_box += np.eye(3)*0.1 # BFGS this later

    combined_handle_and_grads = {}
    stage_dGs = []

    for stage, host_system, host_coords, host_box, num_host_windows in [
        ("complex", complex_system, complex_coords, complex_box, cmd_args.num_complex_windows),
        ("solvent", solvent_system, solvent_coords, solvent_box, cmd_args.num_solvent_windows)]:

        A = int(.35*num_host_windows)
        B = int(.30*num_host_windows)
        C = num_host_windows - A - B

        # Emprically, we see the largest variance in std <du/dl> near the endpoints in the nonbonded
        # terms. Bonded terms are roughly linear. So we add more lambda windows at the endpoint to
        # help improve convergence.
        lambda_schedule = np.concatenate([
            np.linspace(0.0,  0.25, A, endpoint=False),
            np.linspace(0.25, 0.75, B, endpoint=False),
            np.linspace(0.75, 1.0,  C, endpoint=True)
        ])

        assert len(lambda_schedule) == num_host_windows

        print("Minimizing the host structure to remove clashes.")
        minimized_host_coords = minimizer.minimize_host_4d(mol_a, host_system, host_coords, ff, host_box)

        rfe = free_energy.RelativeFreeEnergy(mol_a, mol_b, core, ff)

        # solvent leg
        host_args = []
        for lambda_idx, lamb in enumerate(lambda_schedule):
            gpu_idx = lambda_idx % cmd_args.num_gpus
            host_args.append((gpu_idx, lamb, host_system, minimized_host_coords, host_box, cmd_args.num_equil_steps, cmd_args.num_prod_steps))
        
        results = pool.map(functools.partial(wrap_method, fn=rfe.host_edge), host_args, chunksize=1)

        ghs = []

        for lamb, (bonded_du_dl, nonbonded_du_dl, grads_and_handles) in zip(lambda_schedule, results):
            ghs.append(grads_and_handles)
            print("final", stage, "lambda", lamb, "bonded:", bonded_du_dl[0], bonded_du_dl[1], "nonbonded:", nonbonded_du_dl[0], nonbonded_du_dl[1])

        dG_host = np.trapz([x[0][0]+x[1][0] for x in results], lambda_schedule)
        stage_dGs.append(dG_host)

        # use gradient information from the endpoints
        for (grad_lhs, handle_type_lhs), (grad_rhs, handle_type_rhs) in zip(ghs[0], ghs[-1]):
            assert handle_type_lhs == handle_type_rhs # ffs are forked so the return handler isn't same object as that of ff
            grad = grad_rhs - grad_lhs
            # complex - solvent
            if handle_type_lhs not in combined_handle_and_grads:
                combined_handle_and_grads[handle_type_lhs] = grad
            else:
                combined_handle_and_grads[handle_type_lhs] -= grad

        print(stage, "pred_dG:", dG_host)

    pred = stage_dGs[0] - stage_dGs[1]

    loss = np.abs(pred - label)

    print("loss", loss, "pred", pred, "label", label)

    dl_dpred = np.sign(pred - label)

    # (ytz): these should be made configurable later on.
    gradient_clip_thresholds = {
        nonbonded.AM1CCCHandler: 0.05,
        nonbonded.LennardJonesHandler: np.array([0.001,0])
    }

    # update gradients in place.
    # for handle_type, grad in combined_handle_and_grads.items():

    for handle_type, grad in combined_handle_and_grads.items():
        if handle_type in gradient_clip_thresholds:
            bounds = gradient_clip_thresholds[handle_type]
            dl_dp = dl_dpred*grad # chain rule
            # lots of room to improve here.
            dl_dp = np.clip(dl_dp, -bounds, bounds) # clip gradients so they're well behaved


            if handle_type == nonbonded.AM1CCCHandler:
                # sanity check as we have other charge methods that exist
                assert handle_type == type(ff.q_handle)
                ff.q_handle.params -= dl_dp

                # useful for debugging to dump out the grads
                # for smirks, dp in zip(ff.q_handle.smirks, dl_dp):
                    # if np.any(dp) > 0:
                        # print(smirks, dp)

            elif handle_type == nonbonded.LennardJonesHandler:
                # sanity check again, even though we don't have other lj methods currently
                assert handle_type == type(ff.lj_handle)
                ff.lj_handle.params -= dl_dp
Esempio n. 4
0
romol_b = Chem.AddHs(Chem.MolFromSmiles("CC(=O)OC1=CC=CC=C1C(=O)OC"))

ligand_masses_a = [a.GetMass() for a in romol_a.GetAtoms()]
ligand_masses_b = [a.GetMass() for a in romol_b.GetAtoms()]

# generate conformers
AllChem.EmbedMolecule(romol_a)
AllChem.EmbedMolecule(romol_b)

# extract the 0th conformer
ligand_coords_a = get_romol_conf(romol_a)
ligand_coords_b = get_romol_conf(romol_b)

# construct a 4-nanometer water box (from openmmtools approach: selecting out
#   of a large pre-equilibrated water box snapshot)
system, host_coords, box, omm_topology = builders.build_water_system(4.0)

# padding to avoid jank
box = box + np.eye(3) * 0.1

host_bps, host_masses = openmm_deserializer.deserialize_system(system,
                                                               cutoff=1.2)

combined_masses = np.concatenate(
    [host_masses, ligand_masses_a, ligand_masses_b])

# minimize coordinates

# note: .py file rather than .offxml file
# note: _ccc suffix means "correctable charge corrections"
ff_handlers = deserialize_handlers(
Esempio n. 5
0
def calculate_rigorous_work(
    host_pdbfile, guests_sdfile, outdir, fewer_outfiles=False, no_outfiles=False
):
    """
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(
        f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}

    INSERTION_MAX_LAMBDA = {INSERTION_MAX_LAMBDA}
    DELETION_MAX_LAMBDA = {DELETION_MAX_LAMBDA}
    MIN_LAMBDA = {MIN_LAMBDA}
    TRANSITION_STEPS = {TRANSITION_STEPS}
    EQ1_STEPS = {EQ1_STEPS}
    EQ2_STEPS = {EQ2_STEPS}
    """
    )

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    # sometimes water boxes are sad. Should be minimized first; this is a workaround
    host_box += np.eye(3) * 0.1
    print("host box", host_box)

    solvated_host_pdb = os.path.join(outdir, "solvated_host.pdb")
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    if no_outfiles:
        os.remove(solvated_host_pdb)
    final_host_potentials = []
    host_potentials, host_masses = openmm_deserializer.deserialize_system(solvated_host_system, cutoff=1.2)
    host_nb_bp = None
    for bp in host_potentials:
        if isinstance(bp, potentials.Nonbonded):
            # (ytz): hack to ensure we only have one nonbonded term
            assert host_nb_bp is None
            host_nb_bp = bp
        else:
            final_host_potentials.append(bp)


    # Prepare water box
    print("Generating water box...")
    # TODO: water box probably doesn't need to be this big
    box_lengths = host_box[np.diag_indices(3)]
    water_box_width = min(box_lengths)
    (
        water_system,
        orig_water_coords,
        water_box,
        water_topology,
    ) = builders.build_water_system(water_box_width)

    # sometimes water boxes are sad. should be minimized first; this is a workaround
    water_box += np.eye(3) * 0.1
    print("water box", water_box)

    # it's okay if the water box here and the solvated protein box don't align -- they have PBCs
    water_pdb = os.path.join(outdir, "water_box.pdb")
    writer = pdb_writer.PDBWriter([water_topology], water_pdb)
    writer.write_frame(orig_water_coords)
    writer.close()
    water_mol = Chem.MolFromPDBFile(water_pdb, removeHs=False)
    if no_outfiles:
        os.remove(water_pdb)

    final_water_potentials = []
    water_potentials, water_masses = openmm_deserializer.deserialize_system(water_system, cutoff=1.2)
    water_nb_bp = None
    for bp in water_potentials:
        if isinstance(bp, potentials.Nonbonded):
            # (ytz): hack to ensure we only have one nonbonded term
            assert water_nb_bp is None
            water_nb_bp = bp
        else:
            final_water_potentials.append(bp)

    # Run the procedure
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units
        guest_ff_handlers = deserialize_handlers(
            open(
                os.path.join(
                    os.path.dirname(os.path.abspath(__file__)),
                    "..",
                    "ff/params/smirnoff_1_1_0_ccc.py",
                )
            ).read()
        )
        ff = Forcefield(guest_ff_handlers)
        guest_base_top = topology.BaseTopology(guest_mol, ff)

        # combine host & guest
        hgt = topology.HostGuestTopology(host_nb_bp, guest_base_top)
        # setup the parameter handlers for the ligand
        bonded_tuples = [
            [hgt.parameterize_harmonic_bond, ff.hb_handle],
            [hgt.parameterize_harmonic_angle, ff.ha_handle],
            [hgt.parameterize_proper_torsion, ff.pt_handle],
            [hgt.parameterize_improper_torsion, ff.it_handle]
        ]
        combined_bps = list(final_host_potentials)
        # instantiate the vjps while parameterizing (forward pass)
        for fn, handle in bonded_tuples:
            params, potential = fn(handle.params)
            combined_bps.append(potential.bind(params))
        nb_params, nb_potential = hgt.parameterize_nonbonded(ff.q_handle.params, ff.lj_handle.params)
        combined_bps.append(nb_potential.bind(nb_params))
        guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()]
        combined_masses = np.concatenate([host_masses, guest_masses])

        run_leg(
            solvated_host_coords,
            orig_guest_coords,
            combined_bps,
            combined_masses,
            host_box,
            guest_name,
            "host",
            solvated_host_mol,
            guest_mol,
            outdir,
            fewer_outfiles,
            no_outfiles,
        )
        end_time = time.time()
        print(
            f"{guest_name} host leg time:", "%.2f" % (end_time - start_time), "seconds"
        )

        # combine water & guest
        wgt = topology.HostGuestTopology(water_nb_bp, guest_base_top)
        # setup the parameter handlers for the ligand
        bonded_tuples = [
            [wgt.parameterize_harmonic_bond, ff.hb_handle],
            [wgt.parameterize_harmonic_angle, ff.ha_handle],
            [wgt.parameterize_proper_torsion, ff.pt_handle],
            [wgt.parameterize_improper_torsion, ff.it_handle]
        ]
        combined_bps = list(final_water_potentials)
        # instantiate the vjps while parameterizing (forward pass)
        for fn, handle in bonded_tuples:
            params, potential = fn(handle.params)
            combined_bps.append(potential.bind(params))
        nb_params, nb_potential = wgt.parameterize_nonbonded(ff.q_handle.params, ff.lj_handle.params)
        combined_bps.append(nb_potential.bind(nb_params))
        guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()]
        combined_masses = np.concatenate([water_masses, guest_masses])
        start_time = time.time()
        run_leg(
            orig_water_coords,
            orig_guest_coords,
            combined_bps,
            combined_masses,
            water_box,
            guest_name,
            "water",
            water_mol,
            guest_mol,
            outdir,
            fewer_outfiles,
            no_outfiles,
        )
        end_time = time.time()
        print(
            f"{guest_name} water leg time:", "%.2f" % (end_time - start_time), "seconds"
        )