def freesolv_demo(n_configuration_samples=10, n_parameter_samples=10000):
    """Run toy 2D parameterization demo with all of freesolv"""

    np.random.seed(0)

    mols = []
    hydrogens = []

    for smiles in smiles_list:
        mol = Molecule(smiles, vacuum_samples=[])
        path_to_vacuum_samples = resource_filename(
            'bayes_implicit_solvent',
            'vacuum_samples/vacuum_samples_{}.h5'.format(
                mol.mol_index_in_smiles_list))
        vacuum_traj = md.load(path_to_vacuum_samples)
        thinning = int(len(vacuum_traj) / n_configuration_samples)
        mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(
            vacuum_traj[::thinning])
        print('thinned vacuum_traj from {} to {}'.format(
            len(vacuum_traj), len(mol.vacuum_traj)))
        hydrogens.append(
            np.array([a.element.symbol == 'H' for a in mol.top.atoms()]))
        mols.append(mol)

    def log_prob(radii):
        logp = 0
        for i in range(len(mols)):
            mol = mols[i]
            atomic_radii = np.ones(len(mol.pos)) * radii[0]
            atomic_radii[hydrogens[i]] = radii[1]

            # TODO: update this example to allow the scaling_factors to be variable also
            default_scaling_factors = np.ones(len(mol.pos))
            logp += mol.log_prob(atomic_radii, default_scaling_factors)

        return logp

    radii0 = np.array([0.1, 0.1])
    scales0 = np.array([0.8, 0.8])
    theta0 = pack(radii0, scales0)

    stepsize = 0.002

    traj, log_probs, acceptance_fraction = random_walk_mh(
        theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize)

    np.savez(os.path.join(data_path, 'H_vs_not_freesolv.npz'),
             traj=traj,
             log_probs=log_probs,
             acceptance_fraction=acceptance_fraction,
             stepsize=stepsize,
             n_steps=n_parameter_samples)

    print("acceptance fraction: {:.4f}".format(acceptance_fraction))
def load_dataset(path_to_vacuum_samples,
                 allowed_DeltaG_range=(-15, 5),
                 n_configuration_samples=5):
    paths_to_samples = glob(path_to_vacuum_samples)

    def extract_cid_key(path):
        i = path.find('mobley_')
        j = path.find('.npy')
        return path[i:j]

    molecules = []
    for path in paths_to_samples:
        cid = extract_cid_key(path)
        smiles = cid_to_smiles[cid]
        vacuum_samples = npy_sample_path_to_unitted_snapshots(path)
        thinning = int(len(vacuum_samples) / n_configuration_samples)
        mol = Molecule(smiles, vacuum_samples=vacuum_samples[::thinning])

        min_DeltaG, max_DeltaG = allowed_DeltaG_range
        if (unreduce(mol.experimental_value) >= min_DeltaG) and (unreduce(
                mol.experimental_value) <= max_DeltaG):
            molecules.append(mol)
        else:
            print(
                'discarding {} ({}) because its free energy was outside of the range [-15, +5] kcal/mol'
                .format(smiles, cid))
    return molecules
def methane_demo(n_configuration_samples=10, n_parameter_samples=100000):
    """Run toy 2D parameterization demo with methane only"""

    np.random.seed(0)

    smiles = 'C'
    mol = Molecule(smiles, vacuum_samples=[])
    path_to_vacuum_samples = resource_filename(
        'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format(
            mol.mol_index_in_smiles_list))
    vacuum_traj = md.load(path_to_vacuum_samples)
    thinning = int(len(vacuum_traj) / n_configuration_samples)
    mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(
        vacuum_traj[::thinning])
    print('thinned vacuum_traj from {} to {}'.format(len(vacuum_traj),
                                                     len(mol.vacuum_traj)))

    def log_prob(radii):
        atomic_radii = np.zeros(len(mol.pos))
        atomic_radii[0] = radii[0]
        atomic_radii[1:] = radii[1]

        # TODO: update this example to allow the scaling_factors to be variable also
        default_scaling_factors = np.ones(len(radii))

        return mol.log_prob(atomic_radii, default_scaling_factors)

    radii0 = np.array([0.1, 0.1])

    traj, log_probs, acceptance_fraction = random_walk_mh(
        radii0, log_prob, n_steps=n_parameter_samples, stepsize=0.1)

    np.save(
        os.path.join(data_path,
                     'H_vs_not_radii_samples_{}.npy'.format(smiles)), traj)

    print("acceptance fraction: {:.4f}".format(acceptance_fraction))
    n_configuration_samples = 100  # TODO: Since this is cheaper, can probably modify this a bit...

    name = 'n_config={}_smiles_ind={}'.format(n_configuration_samples, i)
    if gaussian_ll:
        name = name + '_gaussian_ll'
    smiles_subset_fname = os.path.join(data_path,
                                       'smiles_subset_{}.txt'.format(name))
    with open(smiles_subset_fname, 'w') as f:
        f.writelines(['{}\n'.format(s) for s in smiles_subset])

    from bayes_implicit_solvent.utils import get_charges
    from scipy.spatial.distance import pdist, squareform

    for smiles in smiles_subset:
        mol = Molecule(smiles, vacuum_samples=[])
        path_to_vacuum_samples = resource_filename('bayes_implicit_solvent',
                                                   'vacuum_samples/vacuum_samples_{}.h5'.format(
                                                       mol.mol_index_in_smiles_list))
        vacuum_traj = md.load(path_to_vacuum_samples)
        thinning = int(len(vacuum_traj) / n_configuration_samples)
        mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(vacuum_traj[::thinning])
        vacuum_trajs.append(mol.vacuum_traj)
        print('thinned vacuum_traj from {} to {}'.format(len(vacuum_traj), len(mol.vacuum_traj)))

        expt_means.append(mol.experimental_value)
        expt_uncs.append(mol.experimental_uncertainty)

        elements.append(np.array([a.element.atomic_number for a in mol.top.atoms()]))
        charges.append(get_charges(mol.sys))
        distance_matrices.append([squareform(pdist(snapshot / unit.nanometer)) for snapshot in mol.vacuum_traj])
def quarter_freesolv_demo(n_configuration_samples=10,
                          n_parameter_samples=10000,
                          good_initialization=False):
    """Run toy 2D parameterization demo with one randomly-selected quarter of freesolv"""

    np.random.seed(0)

    inds = np.arange(len(smiles_list))
    np.random.shuffle(inds)
    inds = inds[:int(len(smiles_list) / 4)]

    quarter_smiles = [smiles_list[i] for i in inds]

    mols = []
    hydrogens = []

    for smiles in quarter_smiles:
        mol = Molecule(smiles, vacuum_samples=[])
        path_to_vacuum_samples = resource_filename(
            'bayes_implicit_solvent',
            'vacuum_samples/vacuum_samples_{}.h5'.format(
                mol.mol_index_in_smiles_list))
        vacuum_traj = md.load(path_to_vacuum_samples)
        thinning = int(len(vacuum_traj) / n_configuration_samples)
        mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(
            vacuum_traj[::thinning])
        print('thinned vacuum_traj from {} to {}'.format(
            len(vacuum_traj), len(mol.vacuum_traj)))
        hydrogens.append(
            np.array([a.element.symbol == 'H' for a in mol.top.atoms()]))
        mols.append(mol)

    def log_prob(theta):
        radii, scales = unpack(theta)
        logp = 0
        for i in range(len(mols)):
            mol = mols[i]
            atomic_radii = np.ones(len(mol.pos)) * radii[0]
            atomic_radii[hydrogens[i]] = radii[1]

            atomic_scales = np.ones(len(mol.pos)) * scales[0]
            atomic_scales[hydrogens[i]] = scales[1]

            logp += mol.log_prob(atomic_radii, atomic_scales)

        return logp

    radii0 = np.array([0.1, 0.1])
    scales0 = np.array([0.8, 0.8])
    if good_initialization:
        radii0 = np.array([0.28319081, 0.20943347])
        scales0 = np.array([0.89298609, 0.67449963])

    theta0 = pack(radii0, scales0)

    stepsize = 0.0005

    traj, log_probs, acceptance_fraction = random_walk_mh(
        theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize)

    np.savez(os.path.join(
        data_path,
        'H_vs_not_freesolv_{}_dt={}.npz'.format(len(quarter_smiles),
                                                stepsize)),
             traj=traj,
             log_probs=log_probs,
             acceptance_fraction=acceptance_fraction,
             stepsize=stepsize,
             n_steps=n_parameter_samples,
             smiles_subset=quarter_smiles,
             n_configuration_samples=n_configuration_samples)

    print("acceptance fraction: {:.4f}".format(acceptance_fraction))
Ejemplo n.º 6
0
from bayes_implicit_solvent.freesolv import cid_to_smiles

from bayes_implicit_solvent.constants import beta


def unreduce(value):
    """Input value is in units of kB T, turn it into units of kilocalorie_per_mole"""
    return value / (beta * unit.kilocalorie_per_mole)


for path in paths_to_samples:
    cid = extract_cid_key(path)
    smiles = cid_to_smiles[cid]
    vacuum_samples = sample_path_to_unitted_snapshots(path)
    thinning = int(len(vacuum_samples) / n_configuration_samples)
    mol = Molecule(smiles, vacuum_samples=vacuum_samples[::thinning], ll=ll)

    if (unreduce(mol.experimental_value) > -15) and (unreduce(
            mol.experimental_value) < 5):
        mols.append(mol)
    else:
        print(
            'discarding {} ({}) because its free energy was outside of the range [-15, +5] kcal/mol'
            .format(smiles, cid))

element_inds = []
all_elements = ['S', 'Cl', 'F', 'C', 'I', 'N', 'Br', 'H', 'P', 'O']
N = len(all_elements)
element_dict = dict(zip(all_elements, range(len(all_elements))))

initial_radius_dict = dict(H=0.12,