def test_align(self): path = "test/static/phenylpropane*.out" conformational_ensemble = cctk.ConformationalEnsemble() for filename in sorted(glob.glob(path)): gaussian_file = cctk.GaussianFile.read_file(filename) ensemble = gaussian_file.ensemble molecule = ensemble.molecules[-1] properties_dict = ensemble.get_properties_dict(molecule) conformational_ensemble.add_molecule(molecule, properties_dict) comparison_atoms = [1, 2, 3, 4, 5, 6] # added np.int64 here to check that the to_geometry parameter will take any int aligned_ensemble, before_RMSD, after_RMSD = conformational_ensemble.align( to_geometry=np.int64(0), comparison_atoms=comparison_atoms, compute_RMSD=True) for before, after in zip(before_RMSD, after_RMSD): self.assertLess(after, 0.0001) cctk.GaussianFile.write_ensemble_to_file( "test/static/phenylpropane_aligned.gjf", aligned_ensemble, "#p") ensemble2, rmsds = aligned_ensemble.eliminate_redundant( RMSD_cutoff=0.5, comparison_atoms="heavy", return_RMSD=True) self.assertEqual(len(ensemble2), 3) cctk.GaussianFile.write_ensemble_to_file( "test/static/phenylpropane_aligned2.gjf", ensemble2, "#p") ensemble3 = aligned_ensemble.eliminate_redundant( RMSD_cutoff=0.5, comparison_atoms=comparison_atoms) self.assertEqual(len(ensemble3), 1) cctk.GaussianFile.write_ensemble_to_file( "test/static/phenylpropane_aligned3.gjf", ensemble3, "#p") cctk.MOL2File.write_ensemble_to_file( "test/static/phenylpropane_aligned.mol2", aligned_ensemble)
def as_ensemble(self): ensemble = cctk.ConformationalEnsemble() # for frame in self.frames[:-1]: # why is this up to only the second last frame? for frame in self.frames: ensemble.add_molecule(frame.molecule(idxs), { "bath_temperature": frame.bath_temperature, "energy": frame.energy }) return ensemble
def build_test_ensemble(self): path = "test/static/phenylpropane*.out" conformational_ensemble = cctk.ConformationalEnsemble() for filename in sorted(glob.glob(path)): gaussian_file = cctk.GaussianFile.read_file(filename) ensemble = gaussian_file.ensemble molecule = ensemble.molecules[-1] properties_dict = ensemble.get_properties_dict(molecule) conformational_ensemble.add_molecule(molecule, properties_dict) return conformational_ensemble
def read_ensemble(cls, filename, conformational=False): """ Alias for read_trajectory. """ files = cls.read_trajectory(filename) ensemble = None if conformational: ensemble = cctk.ConformationalEnsemble() else: ensemble = cctk.Ensemble() for f in files: ensemble.add_molecule(f.molecule) return ensemble
def test_final_structure(self): path1 = "test/static/methane_perturbed.gjf" path2 = "test/static/methane_perturbed_key.gjf" e = cctk.ConformationalEnsemble() e.add_molecule( cctk.GaussianFile.read_file( path1).get_molecule().assign_connectivity()) e.add_molecule( cctk.GaussianFile.read_file( path2).get_molecule().assign_connectivity().renumber_to_match( e.molecules[0])) e2, before, after = e.align(comparison_atoms="all", compute_RMSD=True) self.assertTrue(after[1] < 0.005)
def test_boltzmann_weighting(self): conformational_ensemble = self.build_test_ensemble() values, weights = conformational_ensemble.boltzmann_average( "energy", energies=[1.36, 0, 1000, 1000, 1000, 1000], energy_unit="kcal_mol", return_weights=True) self.assertTrue((weights[0] / weights[1] - 0.1 < 0.01)) self.assertTrue(values - 0.016152 < 0.0001) ce2 = cctk.ConformationalEnsemble() for filename in glob.glob("test/static/pentane*.out"): gaussian_file = cctk.GaussianFile.read_file(filename) ensemble = gaussian_file.ensemble molecule = ensemble.molecules[-1] properties_dict = ensemble.get_properties_dict(molecule) ce2.add_molecule(molecule, properties_dict) enthalpy = ce2.boltzmann_average("enthalpy") self.assertTrue(enthalpy - .10722 < 0.0001)
def test_ensemble_indexing2(self): path = "test/static/gaussian_file.out" file = cctk.GaussianFile.read_file(path) mols = file.ensemble self.assertTrue(isinstance(mols, cctk.ConformationalEnsemble)) ensemble = cctk.ConformationalEnsemble() for i, molecule in enumerate(mols.molecules): ensemble.add_molecule(molecule) ensemble[molecule, "test_property"] = i self.assertTrue(len(ensemble) == 3) self.assertListEqual(list(ensemble[:, "test_property"]), [0, 1, 2]) ensemble = cctk.Ensemble() for i, molecule in enumerate(mols.molecules): ensemble.add_molecule(molecule) ensemble[molecule, "test_property"] = i self.assertTrue(len(ensemble) == 3) self.assertListEqual(list(ensemble[:, "test_property"]), [0, 1, 2])
def test_write(self): read_path = "test/static/test_peptide.xyz" path = "test/static/test_peptide.inp" new_path = "test/static/test_peptide_copy.inp" file = cctk.XYZFile.read_file(read_path) self.assertTrue(isinstance(file.molecule, cctk.Molecule)) header = "! aug-cc-pVTZ aug-cc-pVTZ/C DLPNO-CCSD(T) TightSCF TightPNO MiniPrint" variables = {"maxcore": 4000} blocks = {"pal": ["nproc 4"], "mdci": ["density none"]} cctk.OrcaFile.write_molecule_to_file(new_path, file.molecule, header, variables, blocks) with open(path) as old: with open(new_path) as new: self.assertListEqual(list(new), list(old)) os.remove(new_path) ensemble = cctk.ConformationalEnsemble() ensemble.add_molecule(file.molecule) orca_file = cctk.OrcaFile(job_types=[cctk.OrcaJobType.SP], ensemble=ensemble, header=header, blocks=blocks, variables=variables) orca_file.write_file(new_path) with open(path) as old: with open(new_path) as new: self.assertListEqual(list(new), list(old)) os.remove(new_path)
import time, sys, glob import numpy as np sys.path.insert(0,'/Users/cwagen/code/cctk') import cctk #### 80.6 seconds before refactoring (6/6/2020) (I think) #### 26.7 seconds after refactoring (6/6/2020) files = glob.glob("/Users/cwagen/code/Martin/ts/*.out") w_start = time.time() p_start = time.process_time() e = cctk.ConformationalEnsemble() for path in files: f = cctk.GaussianFile.read_file(path) m = f.get_molecule().assign_connectivity() if len(e): m = m.renumber_to_match(e.molecules[0]) e.add_molecule(m) w_end = time.time() p_end = time.process_time() print(f"Elapsed time {w_end-w_start:.2f} s (CPU: {p_end-p_start:.2f} s)") #### 18.6 seconds before refactoring (6/6/2020) #### 3.88 seconds after refactoring (6/6/2020) w_start = time.time() p_start = time.process_time()
def main(): e = cctk.ConformationalEnsemble() parser = argparse.ArgumentParser(prog="parse_conformations.py") parser.add_argument("--cutoff", "-c", type=float, default=15) parser.add_argument("--rmsd_cutoff", "-C", type=float, default=0.5) parser.add_argument("prefix", type=str) parser.add_argument("files", nargs='+') args = vars(parser.parse_args(sys.argv[1:])) print("\n\033[3mreading files:\033[0m") pool = mp.Pool(processes=16) for output_file in tqdm(pool.imap(read, args['files']), total=len(args['files'])): molecule = output_file.get_molecule() # there has got to be a better way to do this e.add_molecule(*list(output_file.ensemble.items())[-1]) e[molecule, "iters"] = len(output_file.ensemble) e[molecule, "success"] = output_file.successful_terminations e[molecule, "imaginary"] = output_file.imaginaries() if len(e) == 0: print("no jobs to analyze!") exit() print(f"{len(e)} files read.") e, rmsds = e.eliminate_redundant(RMSD_cutoff=args['rmsd_cutoff'], return_RMSD=True) print( f"{len(e)} distinct conformations identified (RMSD cutoff of {args['rmsd_cutoff']:.2f})." ) print("\n\033[3manalysis:\033[0m") property_names = [ "filename", "new_filename", "rmsd", "iters", "energy", "success", "imaginary" ] values = e[:, property_names] if not isinstance(values[0], list): values = [values] df = pd.DataFrame(values, columns=property_names).fillna(0) df["rmsd"] = rmsds df["new_filename"] = [ f"{args['prefix']}_c{i:03}.gjf" for i in range(len(df)) ] df["rel_energy"] = (df.energy - df.energy.min()) * 627.509469 df.sort_values("rel_energy", inplace=True) normal_col_names = copy.deepcopy(df.columns) df.columns = [f"\033[1m{c}\033[0m" for c in df.columns] print(tabulate(df, headers="keys", tablefmt="presto", floatfmt=".5f")) df.columns = normal_col_names template_file = read(args['files'][0]) print("\n\033[3moutput:\033[0m") for m, n, e in zip(e.molecule_list(), df["new_filename"], df["rel_energy"]): if e <= args["cutoff"]: template_file.write_file(n, molecule=m) print(f"Wrote {n}") else: print( f"Skipping {n} and all subsequent rows: relative energy of {e:.2f} exceeds cutoff of {args['cutoff']:.2f}" ) break