def freesolv_demo(n_configuration_samples=10, n_parameter_samples=10000): """Run toy 2D parameterization demo with all of freesolv""" np.random.seed(0) mols = [] hydrogens = [] for smiles in smiles_list: mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename( 'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots( vacuum_traj[::thinning]) print('thinned vacuum_traj from {} to {}'.format( len(vacuum_traj), len(mol.vacuum_traj))) hydrogens.append( np.array([a.element.symbol == 'H' for a in mol.top.atoms()])) mols.append(mol) def log_prob(radii): logp = 0 for i in range(len(mols)): mol = mols[i] atomic_radii = np.ones(len(mol.pos)) * radii[0] atomic_radii[hydrogens[i]] = radii[1] # TODO: update this example to allow the scaling_factors to be variable also default_scaling_factors = np.ones(len(mol.pos)) logp += mol.log_prob(atomic_radii, default_scaling_factors) return logp radii0 = np.array([0.1, 0.1]) scales0 = np.array([0.8, 0.8]) theta0 = pack(radii0, scales0) stepsize = 0.002 traj, log_probs, acceptance_fraction = random_walk_mh( theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize) np.savez(os.path.join(data_path, 'H_vs_not_freesolv.npz'), traj=traj, log_probs=log_probs, acceptance_fraction=acceptance_fraction, stepsize=stepsize, n_steps=n_parameter_samples) print("acceptance fraction: {:.4f}".format(acceptance_fraction))
def load_dataset(path_to_vacuum_samples, allowed_DeltaG_range=(-15, 5), n_configuration_samples=5): paths_to_samples = glob(path_to_vacuum_samples) def extract_cid_key(path): i = path.find('mobley_') j = path.find('.npy') return path[i:j] molecules = [] for path in paths_to_samples: cid = extract_cid_key(path) smiles = cid_to_smiles[cid] vacuum_samples = npy_sample_path_to_unitted_snapshots(path) thinning = int(len(vacuum_samples) / n_configuration_samples) mol = Molecule(smiles, vacuum_samples=vacuum_samples[::thinning]) min_DeltaG, max_DeltaG = allowed_DeltaG_range if (unreduce(mol.experimental_value) >= min_DeltaG) and (unreduce( mol.experimental_value) <= max_DeltaG): molecules.append(mol) else: print( 'discarding {} ({}) because its free energy was outside of the range [-15, +5] kcal/mol' .format(smiles, cid)) return molecules
def methane_demo(n_configuration_samples=10, n_parameter_samples=100000): """Run toy 2D parameterization demo with methane only""" np.random.seed(0) smiles = 'C' mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename( 'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots( vacuum_traj[::thinning]) print('thinned vacuum_traj from {} to {}'.format(len(vacuum_traj), len(mol.vacuum_traj))) def log_prob(radii): atomic_radii = np.zeros(len(mol.pos)) atomic_radii[0] = radii[0] atomic_radii[1:] = radii[1] # TODO: update this example to allow the scaling_factors to be variable also default_scaling_factors = np.ones(len(radii)) return mol.log_prob(atomic_radii, default_scaling_factors) radii0 = np.array([0.1, 0.1]) traj, log_probs, acceptance_fraction = random_walk_mh( radii0, log_prob, n_steps=n_parameter_samples, stepsize=0.1) np.save( os.path.join(data_path, 'H_vs_not_radii_samples_{}.npy'.format(smiles)), traj) print("acceptance fraction: {:.4f}".format(acceptance_fraction))
n_configuration_samples = 100 # TODO: Since this is cheaper, can probably modify this a bit... name = 'n_config={}_smiles_ind={}'.format(n_configuration_samples, i) if gaussian_ll: name = name + '_gaussian_ll' smiles_subset_fname = os.path.join(data_path, 'smiles_subset_{}.txt'.format(name)) with open(smiles_subset_fname, 'w') as f: f.writelines(['{}\n'.format(s) for s in smiles_subset]) from bayes_implicit_solvent.utils import get_charges from scipy.spatial.distance import pdist, squareform for smiles in smiles_subset: mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename('bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(vacuum_traj[::thinning]) vacuum_trajs.append(mol.vacuum_traj) print('thinned vacuum_traj from {} to {}'.format(len(vacuum_traj), len(mol.vacuum_traj))) expt_means.append(mol.experimental_value) expt_uncs.append(mol.experimental_uncertainty) elements.append(np.array([a.element.atomic_number for a in mol.top.atoms()])) charges.append(get_charges(mol.sys)) distance_matrices.append([squareform(pdist(snapshot / unit.nanometer)) for snapshot in mol.vacuum_traj])
def quarter_freesolv_demo(n_configuration_samples=10, n_parameter_samples=10000, good_initialization=False): """Run toy 2D parameterization demo with one randomly-selected quarter of freesolv""" np.random.seed(0) inds = np.arange(len(smiles_list)) np.random.shuffle(inds) inds = inds[:int(len(smiles_list) / 4)] quarter_smiles = [smiles_list[i] for i in inds] mols = [] hydrogens = [] for smiles in quarter_smiles: mol = Molecule(smiles, vacuum_samples=[]) path_to_vacuum_samples = resource_filename( 'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format( mol.mol_index_in_smiles_list)) vacuum_traj = md.load(path_to_vacuum_samples) thinning = int(len(vacuum_traj) / n_configuration_samples) mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots( vacuum_traj[::thinning]) print('thinned vacuum_traj from {} to {}'.format( len(vacuum_traj), len(mol.vacuum_traj))) hydrogens.append( np.array([a.element.symbol == 'H' for a in mol.top.atoms()])) mols.append(mol) def log_prob(theta): radii, scales = unpack(theta) logp = 0 for i in range(len(mols)): mol = mols[i] atomic_radii = np.ones(len(mol.pos)) * radii[0] atomic_radii[hydrogens[i]] = radii[1] atomic_scales = np.ones(len(mol.pos)) * scales[0] atomic_scales[hydrogens[i]] = scales[1] logp += mol.log_prob(atomic_radii, atomic_scales) return logp radii0 = np.array([0.1, 0.1]) scales0 = np.array([0.8, 0.8]) if good_initialization: radii0 = np.array([0.28319081, 0.20943347]) scales0 = np.array([0.89298609, 0.67449963]) theta0 = pack(radii0, scales0) stepsize = 0.0005 traj, log_probs, acceptance_fraction = random_walk_mh( theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize) np.savez(os.path.join( data_path, 'H_vs_not_freesolv_{}_dt={}.npz'.format(len(quarter_smiles), stepsize)), traj=traj, log_probs=log_probs, acceptance_fraction=acceptance_fraction, stepsize=stepsize, n_steps=n_parameter_samples, smiles_subset=quarter_smiles, n_configuration_samples=n_configuration_samples) print("acceptance fraction: {:.4f}".format(acceptance_fraction))
from bayes_implicit_solvent.freesolv import cid_to_smiles from bayes_implicit_solvent.constants import beta def unreduce(value): """Input value is in units of kB T, turn it into units of kilocalorie_per_mole""" return value / (beta * unit.kilocalorie_per_mole) for path in paths_to_samples: cid = extract_cid_key(path) smiles = cid_to_smiles[cid] vacuum_samples = sample_path_to_unitted_snapshots(path) thinning = int(len(vacuum_samples) / n_configuration_samples) mol = Molecule(smiles, vacuum_samples=vacuum_samples[::thinning], ll=ll) if (unreduce(mol.experimental_value) > -15) and (unreduce( mol.experimental_value) < 5): mols.append(mol) else: print( 'discarding {} ({}) because its free energy was outside of the range [-15, +5] kcal/mol' .format(smiles, cid)) element_inds = [] all_elements = ['S', 'Cl', 'F', 'C', 'I', 'N', 'Br', 'H', 'P', 'O'] N = len(all_elements) element_dict = dict(zip(all_elements, range(len(all_elements)))) initial_radius_dict = dict(H=0.12,