def molecule(self, idxs=None): if idxs is not None: return cctk.Molecule(self.trajectory.atomic_numbers[idxs], self.positions[idxs]) else: return cctk.Molecule(self.trajectory.atomic_numbers, self.positions)
def test_combine(self): m1 = cctk.Molecule(np.array([12], dtype=np.int8), [[0, 0, 0]], charge=-1, multiplicity=1) m2 = cctk.Molecule(np.array([12], dtype=np.int8), [[2, 0, 0]], charge=2, multiplicity=1) m3 = cctk.Molecule.combine_molecules(m1, m2) self.assertTrue(isinstance(m3, cctk.Molecule)) self.assertEqual(m3.num_atoms(), 2) self.assertEqual(m3.charge, 1) self.assertEqual(m3.multiplicity, 1)
def test_periodic_boundary_conditions(self): m1 = cctk.Molecule([9, 9], [[0, 5, 4.5], [0, 0, 0.5]], charge=0, multiplicity=1) m1.assign_connectivity() self.assertFalse(m1.get_bond_order(1, 2)) m1.assign_connectivity( periodic_boundary_conditions=np.array([5, 5, 5])) self.assertTrue(m1.get_bond_order(1, 2)) m2 = cctk.GaussianFile.read_file( "test/static/periodic.gjf").get_molecule() m2 = m2.assign_connectivity( periodic_boundary_conditions=np.array([20, 20, 20])) m2.center_periodic(1, 20) m3 = m2.limit_solvent_shell(num_solvents=10) self.assertEqual(m3.num_atoms(), 83) m3_idxs = m2.limit_solvent_shell(num_solvents=10, return_idxs=True) self.assertEqual(len(m3_idxs), 83) m4 = m2.limit_solvent_shell(num_solvents=10, distance_from_atom=1) self.assertEqual(m4.num_atoms(), 83)
def test_add_atoms(self): mol = cctk.Molecule(np.array([2], dtype=np.int8), [[0, 0, 0]]) self.assertEqual(mol.num_atoms(), 1) mol.add_atom("He", [1, 0, 0]) self.assertListEqual(mol.atomic_numbers.tolist(), [2, 2]) self.assertEqual(mol.num_atoms(), 2) mol.add_atom("Ar", [3, 0, 0]) self.assertEqual(mol.num_atoms(), 3) mol.add_atom_at_centroid("He", [2, 3]) self.assertEqual(mol.num_atoms(), 4) self.assertListEqual(list(mol.get_vector(4)), [2, 0, 0])
def test_translate(self): mol = cctk.Molecule(np.array([12], dtype=np.int8), [[0, 0, 0]]) v = np.array([1.5234, 1.231234, -1.77777]) mol = mol.translate_molecule(v) for target, actual in zip(mol.geometry.tolist()[0], list(v)): self.assertTrue(abs(target - actual) < 0.00001) # self.assertListEqual(mol.geometry.tolist()[0], list(v)) self.assertTrue(isinstance(mol.geometry, cctk.OneIndexedArray)) mol2 = self.load_molecule() v2 = np.zeros(shape=3) mol2_shift = mol2.translate_molecule(v2) self.assertListEqual(mol2.geometry.tolist()[0], mol2_shift.geometry.tolist()[0])
def file_from_lines(cls, lines): num_atoms = 0 try: num_atoms = int(lines[0]) except: raise ValueError( "can't get the number of atoms from the first line!") title = lines[1] atomic_numbers = np.zeros(shape=num_atoms, dtype=np.int8) geometry = np.zeros(shape=(num_atoms, 3)) for index, line in enumerate(lines[2:]): # ignore blank lines if len(line.strip()) == 0: continue pieces = list(filter(None, line.split(" "))) try: if re.match("[0-9]", pieces[0]): atomic_numbers[index] = int(pieces[0]) elif re.match("([A-Za-z])+([0-9])+", pieces[0]): # mdtraj writes in this format, for some reason m = re.match("([A-Za-z])+([0-9])+", pieces[0]) atomic_numbers[index] = int(get_number(m.group(1))) else: atomic_numbers[index] = int(get_number(pieces[0])) geometry[index][0] = float(pieces[1]) geometry[index][1] = float(pieces[2]) geometry[index][2] = float(pieces[3]) except: raise ValueError(f"can't parse line {index+2}: {line}") assert num_atoms == len(atomic_numbers), "wrong number of atoms!" molecule = cctk.Molecule(atomic_numbers, geometry) return XYZFile(molecule, title)
def test_mass_spec(self): mol = cctk.Molecule(np.array([11], dtype=np.int8), [[0, 0, 0]]) masses, weights = mol.calculate_mass_spectrum() self.assertListEqual(list(masses), [23.]) self.assertListEqual(list(weights), [1.])
def read_file_fast(file_text, filename, link1idx, max_len=20000, extended_opt_info=False): #### "Make your bottleneck routines fast, everything else clear" - M. Scott Shell, UCSB #### Welcome to the fast part! #### Here we identify all the lines we're going to scrape words = [ "SCF Done", "Entering Link 1", "Normal termination", "Elapsed time", "Multiplicity", "RMS Force", #5 "RMS Displacement", "Maximum Force", "Maximum Displacement", "Cartesian Forces", "Internal Forces", #10 "Predicted change in Energy", "thermal Enthalpies", "thermal Free Energies", "Frequencies", "Temperature", #15 "Isotropic", "EUMP2", "EUMP3", "UMP4(SDTQ)", "Wavefunction amplitudes converged", #20 ] #### And here are the blocks of text #### format: [start, stop, num] blocks = [ ["#p", "----", 1], ["/99;", "Symbolic Z-matrix", 1], ["The following ModRedundant input section", "\n \n", 1], [ [ "Input orientation", "Standard orientation", "Cartesian Coordinates" ], "Leave Link 202", 1000, ], ["Wallingford", "#p", 1], ["Initial Parameters", "! A", 1], #5 ["Total nuclear spin-spin coupling J", "Leave Link", 1], ["Forces (Hartrees/Bohr)", "Cartesian Forces", 1], [ "Hirshfeld charges, spin densities, dipoles, and CM5 charges", " Hirshfeld charges", 1 ], ["Mulliken charges", "Sum of Mulliken charges", 1], ["Electronic spatial extent", "Quadrupole moment", 1], #10 ["normal coordinates", "Thermochemistry", 1], ["Isotropic", "Eigenvalues", 1000], ] word_matches = [[] for _ in words] block_matches = [[] for _ in blocks] A = ahocorasick.Automaton() for idx, word in enumerate(words): A.add_word(word, idx) for idx, b in enumerate(blocks): if isinstance(b[0], list): for start in b[0]: A.add_word(start, ("start", idx)) else: A.add_word(b[0], ("start", idx)) #### perform search A.make_automaton() found_words = A.iter(file_text) #### now, we have to expand our one-character matches to whole lines/blocks #### this is the slowest part for position, idx in found_words: if isinstance(idx, int): stepsize = 10 match = file_text[position] i = position + 1 while match[-1 - stepsize:].find("\n") < 0: match = match + file_text[i:i + stepsize] i += stepsize match = match.split("\n")[0] j = position while match[:stepsize].find("\n") < 0: match = file_text[j - stepsize:j] + match j += -1 * stepsize match = match.split("\n")[-1] word_matches[idx].append(match) elif isinstance(idx, tuple): idx = idx[1] if len(block_matches[idx]) >= blocks[idx][2]: continue match = "" i = position - len(blocks[idx][0]) + 1 end = blocks[idx][1] stepsize = 1000 file_len = len(file_text) #### we're looking for the end, but we take steps with length ``stepsize`` to go faster while match[-1 * (stepsize + len(end)):-1].count( end) == 0 and match.count("\n") < max_len: match = match + file_text[i:i + stepsize] i += stepsize if i > file_len: break match = match.split(end)[0] # special geometry handling :/ if idx == 3: if len(block_matches[3]) == len(word_matches[0]): block_matches[3].append(match) else: block_matches[3][-1] = match else: block_matches[idx].append(match) del file_text # here, have your RAM back! if len(block_matches[1]) == 0: raise ValueError( f"Can't find a title block - something is wrong with {filename}! (cctk requires Gaussian output files to have been run in ``#p`` verbose mode)" ) #### and from here, we're off to the races! n, g = parse_geometry(block_matches[3]) title, link0, route_card, footer, job_types = parse_header_footer( block_matches[0], block_matches[1], block_matches[2], block_matches[4]) energies, scf_iterations = parse_energies(word_matches[0]) success, elapsed_time = parse_success_elapsed_time(word_matches[2], word_matches[3]) charge, multip = parse_charge_multiplicity(word_matches[4]) bonds = parse_bonds(block_matches[5]) # post-HF methods give weird energies if re.search("mp2", route_card, re.IGNORECASE): energies = parse_mp2_energies(word_matches[17]) elif re.search("mp3", route_card, re.IGNORECASE): energies = parse_mp3_energies(word_matches[18]) elif re.search("mp4", route_card, re.IGNORECASE): energies = parse_mp4_energies(word_matches[19]) elif re.search("ccsd", route_card, re.IGNORECASE): energies = parse_cc_energies(word_matches[20]) elif re.search("cisd", route_card, re.IGNORECASE): energies = parse_ci_energies(word_matches[20]) f = cctk.GaussianFile(job_types=job_types, route_card=route_card, link0=link0, footer=footer, success=success, elapsed_time=elapsed_time, title=title) molecules = [None] * len(g) properties = [{} for _ in range(len(g))] for idx, geom in enumerate(g): molecules[idx] = cctk.Molecule(n[0], geom, charge=charge, multiplicity=multip, bonds=bonds, checks=False) if idx < len(energies): properties[idx]["energy"] = energies[idx] if idx < len(scf_iterations): properties[idx]["scf_iterations"] = scf_iterations[idx] properties[idx]["link1_idx"] = link1idx properties[idx]["filename"] = filename properties[idx]["iteration"] = idx if cctk.GaussianJobType.OPT in job_types: rms_forces = extract_parameter(word_matches[5], 2) rms_disp = extract_parameter(word_matches[6], 2) if extended_opt_info: max_forces = extract_parameter(word_matches[7], 2) max_disp = extract_parameter(word_matches[8], 2) rms_grad = extract_parameter(word_matches[9], 5) max_grad = extract_parameter(word_matches[9], 3) rms_int = extract_parameter(word_matches[10], 5) max_int = extract_parameter(word_matches[10], 3) delta_e = extract_parameter(word_matches[11], 3, cast_to_float=False) for idx, force in enumerate(rms_forces): properties[idx]["rms_force"] = force properties[idx]["rms_displacement"] = rms_disp[idx] if extended_opt_info: if idx < len(max_forces): properties[idx]["max_force"] = max_forces[idx] if idx < len(max_disp): properties[idx]["max_displacement"] = max_disp[idx] if idx < len(max_grad): properties[idx]["max_gradient"] = max_grad[idx] if idx < len(rms_grad): properties[idx]["rms_gradient"] = rms_grad[idx] if idx < len(max_int): properties[idx]["max_internal_force"] = max_int[idx] if idx < len(rms_int): properties[idx]["rms_internal_force"] = rms_int[idx] if idx < len(delta_e): change_in_energy = re.sub(r"Energy=", "", delta_e[idx]) properties[idx]["predicted_change_in_energy"] = float( change_in_energy.replace('D', 'E')) if cctk.GaussianJobType.FREQ in job_types: enthalpies = extract_parameter(word_matches[12], 6) if len(enthalpies) == 1: properties[-1]["enthalpy"] = enthalpies[0] elif len(enthalpies) > 1: raise ValueError( f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}" ) gibbs_vals = extract_parameter(word_matches[13], 7) if len(gibbs_vals) == 1: properties[-1]["gibbs_free_energy"] = gibbs_vals[0] elif len(gibbs_vals) > 1: raise ValueError( f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}" ) vibrational_modes = parse_modes(block_matches[11], num_atoms=molecules[-1].num_atoms(), hpmodes=re.search( "hpmodes", route_card)) molecules[-1].vibrational_modes = vibrational_modes frequencies = [] try: frequencies += extract_parameter(word_matches[14], 2) # very small molecules might only have 1 or 2 freqs try: frequencies += extract_parameter(word_matches[14], 3) except: pass try: frequencies += extract_parameter(word_matches[14], 4) except: pass properties[-1]["frequencies"] = sorted(frequencies) except Exception as e: raise ValueError("error finding frequencies") temperature = extract_parameter(word_matches[15], 1) if len(temperature) == 1: properties[-1]["temperature"] = temperature[0] corrected_free_energy = get_corrected_free_energy( gibbs_vals[0], frequencies, frequency_cutoff=100.0, temperature=temperature[0]) properties[-1]["quasiharmonic_gibbs_free_energy"] = float( corrected_free_energy) if cctk.GaussianJobType.NMR in job_types: nmr_shifts, shielding_tensors = read_nmr_shifts( block_matches[12], molecules[0].num_atoms()) if nmr_shifts is not None: properties[-1]["isotropic_shielding"] = nmr_shifts.view( cctk.OneIndexedArray) properties[-1]["shielding_tensors"] = shielding_tensors if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search( "nmr=spinspin", f.route_card, flags=re.IGNORECASE): couplings = read_j_couplings(block_matches[6], molecules[0].num_atoms()) if couplings is not None: properties[-1]["j_couplings"] = couplings if cctk.GaussianJobType.FORCE in job_types: assert len( molecules ) == 1, "force jobs should not be combined with optimizations!" forces = parse_forces(block_matches[7]) properties[0]["forces"] = forces if cctk.GaussianJobType.POP in job_types: if re.search("hirshfeld", f.route_card) or re.search( "cm5", f.route_card) and len(block_matches[8]) > 0: charges, spins = parse_hirshfeld(block_matches[8]) properties[-1]["hirshfeld_charges"] = charges properties[-1]["hirshfeld_spins"] = spins try: charges, dipole, dipole_v = parse_charges_dipole( block_matches[9], block_matches[10]) properties[-1]["mulliken_charges"] = charges properties[-1]["dipole_moment"] = dipole properties[-1]["dipole_vector"] = dipole_v except Exception as e: pass for mol, prop in zip(molecules, properties): f.ensemble.add_molecule(mol, properties=prop) f.check_has_properties() return f
def remove_group_from_molecule(molecule, atom1, atom2, return_mapping=False): """ The microscopic reverse of ``add_group_to_molecule`` -- splits a ``Molecule`` along the ``atom1``–``atom2`` bond and returns a new ``Molecule`` object (the ``atom1`` side) and a new ``Group`` (the ``atom2`` side). The new objects will be capped with hydrogens; atom ordering will be preserved! Args: molecule (Molecule): the molecule to change atom1 (int): the 1-indexed atom number on `molecule` to make part of the new ``Molecule`` object atom2 (int): the 1-indexed atom number on `molecule` to make part of the new ``Group`` object return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products Returns: new Molecule object new Group object (optional) molecule_to_molecule dictionary mapping atom numbers from starting molecule (key) to new molecule atom numbers (val) (optional) molecule_to_group dictionary mapping atom numbers from starting molecule (key) to new group atom numbers (val) """ try: atom1 = int(atom1) atom2 = int(atom2) except: raise TypeError("atom numbers not castable to int") molecule = copy.deepcopy(molecule) molecule._check_atom_number(atom1) molecule._check_atom_number(atom2) #### define mapping dicts fragment1, fragment2 = molecule._get_bond_fragments(atom1, atom2) molecule_to_molecule = {x: i + 1 for i, x in enumerate(fragment1)} molecule_to_group = {x: i + 1 for i, x in enumerate(fragment2)} #### create new molecules new_mol = cctk.Molecule(molecule.atomic_numbers[fragment1], molecule.geometry[fragment1]) group = cctk.Molecule(molecule.atomic_numbers[fragment2], molecule.geometry[fragment2]) #### add capping H to new_mol new_mol.add_atom("H", molecule.geometry[atom2]) molecule_to_molecule[atom2] = new_mol.num_atoms() old_radius = get_covalent_radius(molecule.atomic_numbers[atom2]) H_radius = get_covalent_radius(1) new_dist = new_mol.get_distance( molecule_to_molecule[atom1], molecule_to_molecule[atom2]) - old_radius + H_radius new_mol.set_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2], new_dist) new_mol.add_bond(molecule_to_molecule[atom1], molecule_to_molecule[atom2]) #### add capping H to new group group.add_atom("H", molecule.geometry[atom1]) molecule_to_group[atom1] = group.num_atoms() old_radius = get_covalent_radius(molecule.atomic_numbers[atom1]) new_dist = group.get_distance( molecule_to_group[atom2], molecule_to_group[atom1]) - old_radius + H_radius group.set_distance(molecule_to_group[atom2], molecule_to_group[atom1], new_dist) group.add_bond(molecule_to_group[atom2], molecule_to_group[atom1]) #### add bonds to nascent molecules molecule.remove_bond(atom1, atom2) for (a1, a2) in molecule.bonds.edges(): if a1 in fragment1: assert a2 in fragment1, "somehow we have another bond between the two groups!" assert molecule_to_molecule[ a1] is not None, f"we don't have a mapping for atom {a1}" assert molecule_to_molecule[ a2] is not None, f"we don't have a mapping for atom {a2}" new_mol.add_bond(molecule_to_molecule[a1], molecule_to_molecule[a2]) elif a2 in fragment2: assert a2 in fragment2, "somehow we have another bond between the two groups!" assert molecule_to_group[ a1] is not None, f"we don't have a mapping for atom {a1}" assert molecule_to_group[ a2] is not None, f"we don't have a mapping for atom {a2}" group.add_bond(molecule_to_group[a1], molecule_to_group[a2]) #### create Group object from group group = cctk.Group.new_from_molecule( attach_to=molecule_to_group[atom1], molecule=group) if return_mapping: return new_mol, group, molecule_to_molecule, molecule_to_group else: return new_mol, group