def freesolv_demo(n_configuration_samples=10, n_parameter_samples=10000): """Run toy 2D parameterization demo with all of freesolv""" np.random.seed(0) mols = [] hydrogens = [] for smiles in smiles_list: mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename( 'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots( vacuum_traj[::thinning]) print('thinned vacuum_traj from {} to {}'.format( len(vacuum_traj), len(mol.vacuum_traj))) hydrogens.append( np.array([a.element.symbol == 'H' for a in mol.top.atoms()])) mols.append(mol) def log_prob(radii): logp = 0 for i in range(len(mols)): mol = mols[i] atomic_radii = np.ones(len(mol.pos)) * radii[0] atomic_radii[hydrogens[i]] = radii[1] # TODO: update this example to allow the scaling_factors to be variable also default_scaling_factors = np.ones(len(mol.pos)) logp += mol.log_prob(atomic_radii, default_scaling_factors) return logp radii0 = np.array([0.1, 0.1]) scales0 = np.array([0.8, 0.8]) theta0 = pack(radii0, scales0) stepsize = 0.002 traj, log_probs, acceptance_fraction = random_walk_mh( theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize) np.savez(os.path.join(data_path, 'H_vs_not_freesolv.npz'), traj=traj, log_probs=log_probs, acceptance_fraction=acceptance_fraction, stepsize=stepsize, n_steps=n_parameter_samples) print("acceptance fraction: {:.4f}".format(acceptance_fraction))
def methane_demo(n_configuration_samples=10, n_parameter_samples=100000): """Run toy 2D parameterization demo with methane only""" np.random.seed(0) smiles = 'C' mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename( 'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots( vacuum_traj[::thinning]) print('thinned vacuum_traj from {} to {}'.format(len(vacuum_traj), len(mol.vacuum_traj))) def log_prob(radii): atomic_radii = np.zeros(len(mol.pos)) atomic_radii[0] = radii[0] atomic_radii[1:] = radii[1] # TODO: update this example to allow the scaling_factors to be variable also default_scaling_factors = np.ones(len(radii)) return mol.log_prob(atomic_radii, default_scaling_factors) radii0 = np.array([0.1, 0.1]) traj, log_probs, acceptance_fraction = random_walk_mh( radii0, log_prob, n_steps=n_parameter_samples, stepsize=0.1) np.save( os.path.join(data_path, 'H_vs_not_radii_samples_{}.npy'.format(smiles)), traj) print("acceptance fraction: {:.4f}".format(acceptance_fraction))
smiles_subset_fname = os.path.join(data_path, 'smiles_subset_{}.txt'.format(name)) with open(smiles_subset_fname, 'w') as f: f.writelines(['{}\n'.format(s) for s in smiles_subset]) from bayes_implicit_solvent.utils import get_charges from scipy.spatial.distance import pdist, squareform for smiles in smiles_subset: mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename('bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(vacuum_traj[::thinning]) vacuum_trajs.append(mol.vacuum_traj) print('thinned vacuum_traj from {} to {}'.format(len(vacuum_traj), len(mol.vacuum_traj))) expt_means.append(mol.experimental_value) expt_uncs.append(mol.experimental_uncertainty) elements.append(np.array([a.element.atomic_number for a in mol.top.atoms()])) charges.append(get_charges(mol.sys)) distance_matrices.append([squareform(pdist(snapshot / unit.nanometer)) for snapshot in mol.vacuum_traj]) mols.append(mol) # 2. Define a likelihood function, including "type-assignment" from autograd import numpy as np from autograd.scipy.stats import norm from autograd.scipy.stats import t as student_t
def quarter_freesolv_demo(n_configuration_samples=10, n_parameter_samples=10000, good_initialization=False): """Run toy 2D parameterization demo with one randomly-selected quarter of freesolv""" np.random.seed(0) inds = np.arange(len(smiles_list)) np.random.shuffle(inds) inds = inds[:int(len(smiles_list) / 4)] quarter_smiles = [smiles_list[i] for i in inds] mols = [] hydrogens = [] for smiles in quarter_smiles: mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename( 'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots( vacuum_traj[::thinning]) print('thinned vacuum_traj from {} to {}'.format( len(vacuum_traj), len(mol.vacuum_traj))) hydrogens.append( np.array([a.element.symbol == 'H' for a in mol.top.atoms()])) mols.append(mol) def log_prob(theta): radii, scales = unpack(theta) logp = 0 for i in range(len(mols)): mol = mols[i] atomic_radii = np.ones(len(mol.pos)) * radii[0] atomic_radii[hydrogens[i]] = radii[1] atomic_scales = np.ones(len(mol.pos)) * scales[0] atomic_scales[hydrogens[i]] = scales[1] logp += mol.log_prob(atomic_radii, atomic_scales) return logp radii0 = np.array([0.1, 0.1]) scales0 = np.array([0.8, 0.8]) if good_initialization: radii0 = np.array([0.28319081, 0.20943347]) scales0 = np.array([0.89298609, 0.67449963]) theta0 = pack(radii0, scales0) stepsize = 0.0005 traj, log_probs, acceptance_fraction = random_walk_mh( theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize) np.savez(os.path.join( data_path, 'H_vs_not_freesolv_{}_dt={}.npz'.format(len(quarter_smiles), stepsize)), traj=traj, log_probs=log_probs, acceptance_fraction=acceptance_fraction, stepsize=stepsize, n_steps=n_parameter_samples, smiles_subset=quarter_smiles, n_configuration_samples=n_configuration_samples) print("acceptance fraction: {:.4f}".format(acceptance_fraction))