Beispiel #1
0
 def molecule(self, idxs=None):
     if idxs is not None:
         return cctk.Molecule(self.trajectory.atomic_numbers[idxs],
                              self.positions[idxs])
     else:
         return cctk.Molecule(self.trajectory.atomic_numbers,
                              self.positions)
Beispiel #2
0
    def test_combine(self):
        m1 = cctk.Molecule(np.array([12], dtype=np.int8), [[0, 0, 0]],
                           charge=-1,
                           multiplicity=1)
        m2 = cctk.Molecule(np.array([12], dtype=np.int8), [[2, 0, 0]],
                           charge=2,
                           multiplicity=1)

        m3 = cctk.Molecule.combine_molecules(m1, m2)
        self.assertTrue(isinstance(m3, cctk.Molecule))
        self.assertEqual(m3.num_atoms(), 2)
        self.assertEqual(m3.charge, 1)
        self.assertEqual(m3.multiplicity, 1)
Beispiel #3
0
    def test_periodic_boundary_conditions(self):
        m1 = cctk.Molecule([9, 9], [[0, 5, 4.5], [0, 0, 0.5]],
                           charge=0,
                           multiplicity=1)

        m1.assign_connectivity()
        self.assertFalse(m1.get_bond_order(1, 2))

        m1.assign_connectivity(
            periodic_boundary_conditions=np.array([5, 5, 5]))
        self.assertTrue(m1.get_bond_order(1, 2))

        m2 = cctk.GaussianFile.read_file(
            "test/static/periodic.gjf").get_molecule()
        m2 = m2.assign_connectivity(
            periodic_boundary_conditions=np.array([20, 20, 20]))
        m2.center_periodic(1, 20)

        m3 = m2.limit_solvent_shell(num_solvents=10)
        self.assertEqual(m3.num_atoms(), 83)

        m3_idxs = m2.limit_solvent_shell(num_solvents=10, return_idxs=True)
        self.assertEqual(len(m3_idxs), 83)

        m4 = m2.limit_solvent_shell(num_solvents=10, distance_from_atom=1)
        self.assertEqual(m4.num_atoms(), 83)
Beispiel #4
0
    def test_add_atoms(self):
        mol = cctk.Molecule(np.array([2], dtype=np.int8), [[0, 0, 0]])
        self.assertEqual(mol.num_atoms(), 1)

        mol.add_atom("He", [1, 0, 0])
        self.assertListEqual(mol.atomic_numbers.tolist(), [2, 2])
        self.assertEqual(mol.num_atoms(), 2)

        mol.add_atom("Ar", [3, 0, 0])
        self.assertEqual(mol.num_atoms(), 3)

        mol.add_atom_at_centroid("He", [2, 3])
        self.assertEqual(mol.num_atoms(), 4)
        self.assertListEqual(list(mol.get_vector(4)), [2, 0, 0])
Beispiel #5
0
    def test_translate(self):
        mol = cctk.Molecule(np.array([12], dtype=np.int8), [[0, 0, 0]])

        v = np.array([1.5234, 1.231234, -1.77777])
        mol = mol.translate_molecule(v)

        for target, actual in zip(mol.geometry.tolist()[0], list(v)):
            self.assertTrue(abs(target - actual) < 0.00001)


#        self.assertListEqual(mol.geometry.tolist()[0], list(v))
        self.assertTrue(isinstance(mol.geometry, cctk.OneIndexedArray))

        mol2 = self.load_molecule()
        v2 = np.zeros(shape=3)

        mol2_shift = mol2.translate_molecule(v2)
        self.assertListEqual(mol2.geometry.tolist()[0],
                             mol2_shift.geometry.tolist()[0])
Beispiel #6
0
    def file_from_lines(cls, lines):
        num_atoms = 0
        try:
            num_atoms = int(lines[0])
        except:
            raise ValueError(
                "can't get the number of atoms from the first line!")

        title = lines[1]

        atomic_numbers = np.zeros(shape=num_atoms, dtype=np.int8)
        geometry = np.zeros(shape=(num_atoms, 3))

        for index, line in enumerate(lines[2:]):
            # ignore blank lines
            if len(line.strip()) == 0:
                continue

            pieces = list(filter(None, line.split(" ")))
            try:
                if re.match("[0-9]", pieces[0]):
                    atomic_numbers[index] = int(pieces[0])
                elif re.match("([A-Za-z])+([0-9])+", pieces[0]):
                    # mdtraj writes in this format, for some reason
                    m = re.match("([A-Za-z])+([0-9])+", pieces[0])
                    atomic_numbers[index] = int(get_number(m.group(1)))
                else:
                    atomic_numbers[index] = int(get_number(pieces[0]))
                geometry[index][0] = float(pieces[1])
                geometry[index][1] = float(pieces[2])
                geometry[index][2] = float(pieces[3])
            except:
                raise ValueError(f"can't parse line {index+2}: {line}")

        assert num_atoms == len(atomic_numbers), "wrong number of atoms!"
        molecule = cctk.Molecule(atomic_numbers, geometry)
        return XYZFile(molecule, title)
Beispiel #7
0
 def test_mass_spec(self):
     mol = cctk.Molecule(np.array([11], dtype=np.int8), [[0, 0, 0]])
     masses, weights = mol.calculate_mass_spectrum()
     self.assertListEqual(list(masses), [23.])
     self.assertListEqual(list(weights), [1.])
Beispiel #8
0
def read_file_fast(file_text,
                   filename,
                   link1idx,
                   max_len=20000,
                   extended_opt_info=False):

    #### "Make your bottleneck routines fast, everything else clear" - M. Scott Shell, UCSB
    #### Welcome to the fast part!

    #### Here we identify all the lines we're going to scrape
    words = [
        "SCF Done",
        "Entering Link 1",
        "Normal termination",
        "Elapsed time",
        "Multiplicity",
        "RMS     Force",  #5
        "RMS     Displacement",
        "Maximum Force",
        "Maximum Displacement",
        "Cartesian Forces",
        "Internal  Forces",  #10
        "Predicted change in Energy",
        "thermal Enthalpies",
        "thermal Free Energies",
        "Frequencies",
        "Temperature",  #15
        "Isotropic",
        "EUMP2",
        "EUMP3",
        "UMP4(SDTQ)",
        "Wavefunction amplitudes converged",  #20
    ]

    #### And here are the blocks of text
    #### format: [start, stop, num]

    blocks = [
        ["#p", "----", 1],
        ["/99;", "Symbolic Z-matrix", 1],
        ["The following ModRedundant input section", "\n \n", 1],
        [
            [
                "Input orientation", "Standard orientation",
                "Cartesian Coordinates"
            ],
            "Leave Link  202",
            1000,
        ],
        ["Wallingford", "#p", 1],
        ["Initial Parameters", "! A", 1],  #5
        ["Total nuclear spin-spin coupling J", "Leave Link", 1],
        ["Forces (Hartrees/Bohr)", "Cartesian Forces", 1],
        [
            "Hirshfeld charges, spin densities, dipoles, and CM5 charges",
            " Hirshfeld charges", 1
        ],
        ["Mulliken charges", "Sum of Mulliken charges", 1],
        ["Electronic spatial extent", "Quadrupole moment", 1],  #10
        ["normal coordinates", "Thermochemistry", 1],
        ["Isotropic", "Eigenvalues", 1000],
    ]

    word_matches = [[] for _ in words]
    block_matches = [[] for _ in blocks]

    A = ahocorasick.Automaton()

    for idx, word in enumerate(words):
        A.add_word(word, idx)

    for idx, b in enumerate(blocks):
        if isinstance(b[0], list):
            for start in b[0]:
                A.add_word(start, ("start", idx))
        else:
            A.add_word(b[0], ("start", idx))

    #### perform search
    A.make_automaton()
    found_words = A.iter(file_text)

    #### now, we have to expand our one-character matches to whole lines/blocks
    #### this is the slowest part
    for position, idx in found_words:
        if isinstance(idx, int):
            stepsize = 10

            match = file_text[position]
            i = position + 1
            while match[-1 - stepsize:].find("\n") < 0:
                match = match + file_text[i:i + stepsize]
                i += stepsize

            match = match.split("\n")[0]

            j = position
            while match[:stepsize].find("\n") < 0:
                match = file_text[j - stepsize:j] + match
                j += -1 * stepsize

            match = match.split("\n")[-1]
            word_matches[idx].append(match)

        elif isinstance(idx, tuple):
            idx = idx[1]
            if len(block_matches[idx]) >= blocks[idx][2]:
                continue

            match = ""
            i = position - len(blocks[idx][0]) + 1
            end = blocks[idx][1]

            stepsize = 1000
            file_len = len(file_text)

            #### we're looking for the end, but we take steps with length ``stepsize`` to go faster
            while match[-1 * (stepsize + len(end)):-1].count(
                    end) == 0 and match.count("\n") < max_len:
                match = match + file_text[i:i + stepsize]
                i += stepsize

                if i > file_len:
                    break

            match = match.split(end)[0]

            # special geometry handling :/
            if idx == 3:
                if len(block_matches[3]) == len(word_matches[0]):
                    block_matches[3].append(match)
                else:
                    block_matches[3][-1] = match

            else:
                block_matches[idx].append(match)

    del file_text  # here, have your RAM back!

    if len(block_matches[1]) == 0:
        raise ValueError(
            f"Can't find a title block - something is wrong with {filename}! (cctk requires Gaussian output files to have been run in ``#p`` verbose mode)"
        )

    #### and from here, we're off to the races!
    n, g = parse_geometry(block_matches[3])
    title, link0, route_card, footer, job_types = parse_header_footer(
        block_matches[0], block_matches[1], block_matches[2], block_matches[4])
    energies, scf_iterations = parse_energies(word_matches[0])
    success, elapsed_time = parse_success_elapsed_time(word_matches[2],
                                                       word_matches[3])
    charge, multip = parse_charge_multiplicity(word_matches[4])
    bonds = parse_bonds(block_matches[5])

    # post-HF methods give weird energies
    if re.search("mp2", route_card, re.IGNORECASE):
        energies = parse_mp2_energies(word_matches[17])
    elif re.search("mp3", route_card, re.IGNORECASE):
        energies = parse_mp3_energies(word_matches[18])
    elif re.search("mp4", route_card, re.IGNORECASE):
        energies = parse_mp4_energies(word_matches[19])
    elif re.search("ccsd", route_card, re.IGNORECASE):
        energies = parse_cc_energies(word_matches[20])
    elif re.search("cisd", route_card, re.IGNORECASE):
        energies = parse_ci_energies(word_matches[20])

    f = cctk.GaussianFile(job_types=job_types,
                          route_card=route_card,
                          link0=link0,
                          footer=footer,
                          success=success,
                          elapsed_time=elapsed_time,
                          title=title)

    molecules = [None] * len(g)
    properties = [{} for _ in range(len(g))]
    for idx, geom in enumerate(g):
        molecules[idx] = cctk.Molecule(n[0],
                                       geom,
                                       charge=charge,
                                       multiplicity=multip,
                                       bonds=bonds,
                                       checks=False)
        if idx < len(energies):
            properties[idx]["energy"] = energies[idx]
        if idx < len(scf_iterations):
            properties[idx]["scf_iterations"] = scf_iterations[idx]
        properties[idx]["link1_idx"] = link1idx
        properties[idx]["filename"] = filename
        properties[idx]["iteration"] = idx

    if cctk.GaussianJobType.OPT in job_types:
        rms_forces = extract_parameter(word_matches[5], 2)
        rms_disp = extract_parameter(word_matches[6], 2)

        if extended_opt_info:
            max_forces = extract_parameter(word_matches[7], 2)
            max_disp = extract_parameter(word_matches[8], 2)
            rms_grad = extract_parameter(word_matches[9], 5)
            max_grad = extract_parameter(word_matches[9], 3)
            rms_int = extract_parameter(word_matches[10], 5)
            max_int = extract_parameter(word_matches[10], 3)
            delta_e = extract_parameter(word_matches[11],
                                        3,
                                        cast_to_float=False)

        for idx, force in enumerate(rms_forces):
            properties[idx]["rms_force"] = force
            properties[idx]["rms_displacement"] = rms_disp[idx]

            if extended_opt_info:
                if idx < len(max_forces):
                    properties[idx]["max_force"] = max_forces[idx]

                if idx < len(max_disp):
                    properties[idx]["max_displacement"] = max_disp[idx]

                if idx < len(max_grad):
                    properties[idx]["max_gradient"] = max_grad[idx]

                if idx < len(rms_grad):
                    properties[idx]["rms_gradient"] = rms_grad[idx]

                if idx < len(max_int):
                    properties[idx]["max_internal_force"] = max_int[idx]

                if idx < len(rms_int):
                    properties[idx]["rms_internal_force"] = rms_int[idx]

                if idx < len(delta_e):
                    change_in_energy = re.sub(r"Energy=", "", delta_e[idx])
                    properties[idx]["predicted_change_in_energy"] = float(
                        change_in_energy.replace('D', 'E'))

    if cctk.GaussianJobType.FREQ in job_types:
        enthalpies = extract_parameter(word_matches[12], 6)
        if len(enthalpies) == 1:
            properties[-1]["enthalpy"] = enthalpies[0]
        elif len(enthalpies) > 1:
            raise ValueError(
                f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}"
            )

        gibbs_vals = extract_parameter(word_matches[13], 7)
        if len(gibbs_vals) == 1:
            properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
        elif len(gibbs_vals) > 1:
            raise ValueError(
                f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}"
            )

        vibrational_modes = parse_modes(block_matches[11],
                                        num_atoms=molecules[-1].num_atoms(),
                                        hpmodes=re.search(
                                            "hpmodes", route_card))
        molecules[-1].vibrational_modes = vibrational_modes

        frequencies = []
        try:
            frequencies += extract_parameter(word_matches[14], 2)

            # very small molecules might only have 1 or 2 freqs
            try:
                frequencies += extract_parameter(word_matches[14], 3)
            except:
                pass
            try:
                frequencies += extract_parameter(word_matches[14], 4)
            except:
                pass

            properties[-1]["frequencies"] = sorted(frequencies)
        except Exception as e:
            raise ValueError("error finding frequencies")

        temperature = extract_parameter(word_matches[15], 1)
        if len(temperature) == 1:
            properties[-1]["temperature"] = temperature[0]
            corrected_free_energy = get_corrected_free_energy(
                gibbs_vals[0],
                frequencies,
                frequency_cutoff=100.0,
                temperature=temperature[0])
            properties[-1]["quasiharmonic_gibbs_free_energy"] = float(
                corrected_free_energy)

    if cctk.GaussianJobType.NMR in job_types:
        nmr_shifts, shielding_tensors = read_nmr_shifts(
            block_matches[12], molecules[0].num_atoms())
        if nmr_shifts is not None:
            properties[-1]["isotropic_shielding"] = nmr_shifts.view(
                cctk.OneIndexedArray)
            properties[-1]["shielding_tensors"] = shielding_tensors

        if re.search("nmr=mixed", f.route_card,
                     flags=re.IGNORECASE) or re.search(
                         "nmr=spinspin", f.route_card, flags=re.IGNORECASE):
            couplings = read_j_couplings(block_matches[6],
                                         molecules[0].num_atoms())
            if couplings is not None:
                properties[-1]["j_couplings"] = couplings

    if cctk.GaussianJobType.FORCE in job_types:
        assert len(
            molecules
        ) == 1, "force jobs should not be combined with optimizations!"
        forces = parse_forces(block_matches[7])
        properties[0]["forces"] = forces

    if cctk.GaussianJobType.POP in job_types:
        if re.search("hirshfeld", f.route_card) or re.search(
                "cm5", f.route_card) and len(block_matches[8]) > 0:
            charges, spins = parse_hirshfeld(block_matches[8])
            properties[-1]["hirshfeld_charges"] = charges
            properties[-1]["hirshfeld_spins"] = spins

    try:
        charges, dipole, dipole_v = parse_charges_dipole(
            block_matches[9], block_matches[10])
        properties[-1]["mulliken_charges"] = charges
        properties[-1]["dipole_moment"] = dipole
        properties[-1]["dipole_vector"] = dipole_v
    except Exception as e:
        pass

    for mol, prop in zip(molecules, properties):
        f.ensemble.add_molecule(mol, properties=prop)

    f.check_has_properties()
    return f
Beispiel #9
0
    def remove_group_from_molecule(molecule,
                                   atom1,
                                   atom2,
                                   return_mapping=False):
        """
        The microscopic reverse of ``add_group_to_molecule`` -- splits a ``Molecule`` along the ``atom1``–``atom2`` bond
        and returns a new ``Molecule`` object (the ``atom1`` side) and a new ``Group`` (the ``atom2`` side).

        The new objects will be capped with hydrogens; atom ordering will be preserved!

        Args:
            molecule (Molecule): the molecule to change
            atom1 (int): the 1-indexed atom number on `molecule` to make part of the new ``Molecule`` object
            atom2 (int): the 1-indexed atom number on `molecule` to make part of the new ``Group`` object
            return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products

        Returns:
            new Molecule object
            new Group object

            (optional) molecule_to_molecule dictionary mapping atom numbers from starting molecule (key) to new molecule atom numbers (val)
            (optional) molecule_to_group dictionary mapping atom numbers from starting molecule (key) to new group atom numbers (val)
        """
        try:
            atom1 = int(atom1)
            atom2 = int(atom2)
        except:
            raise TypeError("atom numbers not castable to int")

        molecule = copy.deepcopy(molecule)
        molecule._check_atom_number(atom1)
        molecule._check_atom_number(atom2)

        #### define mapping dicts
        fragment1, fragment2 = molecule._get_bond_fragments(atom1, atom2)
        molecule_to_molecule = {x: i + 1 for i, x in enumerate(fragment1)}
        molecule_to_group = {x: i + 1 for i, x in enumerate(fragment2)}

        #### create new molecules
        new_mol = cctk.Molecule(molecule.atomic_numbers[fragment1],
                                molecule.geometry[fragment1])
        group = cctk.Molecule(molecule.atomic_numbers[fragment2],
                              molecule.geometry[fragment2])

        #### add capping H to new_mol
        new_mol.add_atom("H", molecule.geometry[atom2])
        molecule_to_molecule[atom2] = new_mol.num_atoms()
        old_radius = get_covalent_radius(molecule.atomic_numbers[atom2])
        H_radius = get_covalent_radius(1)
        new_dist = new_mol.get_distance(
            molecule_to_molecule[atom1],
            molecule_to_molecule[atom2]) - old_radius + H_radius
        new_mol.set_distance(molecule_to_molecule[atom1],
                             molecule_to_molecule[atom2], new_dist)
        new_mol.add_bond(molecule_to_molecule[atom1],
                         molecule_to_molecule[atom2])

        #### add capping H to new group
        group.add_atom("H", molecule.geometry[atom1])
        molecule_to_group[atom1] = group.num_atoms()
        old_radius = get_covalent_radius(molecule.atomic_numbers[atom1])
        new_dist = group.get_distance(
            molecule_to_group[atom2],
            molecule_to_group[atom1]) - old_radius + H_radius
        group.set_distance(molecule_to_group[atom2], molecule_to_group[atom1],
                           new_dist)
        group.add_bond(molecule_to_group[atom2], molecule_to_group[atom1])

        #### add bonds to nascent molecules
        molecule.remove_bond(atom1, atom2)
        for (a1, a2) in molecule.bonds.edges():
            if a1 in fragment1:
                assert a2 in fragment1, "somehow we have another bond between the two groups!"
                assert molecule_to_molecule[
                    a1] is not None, f"we don't have a mapping for atom {a1}"
                assert molecule_to_molecule[
                    a2] is not None, f"we don't have a mapping for atom {a2}"
                new_mol.add_bond(molecule_to_molecule[a1],
                                 molecule_to_molecule[a2])
            elif a2 in fragment2:
                assert a2 in fragment2, "somehow we have another bond between the two groups!"
                assert molecule_to_group[
                    a1] is not None, f"we don't have a mapping for atom {a1}"
                assert molecule_to_group[
                    a2] is not None, f"we don't have a mapping for atom {a2}"
                group.add_bond(molecule_to_group[a1], molecule_to_group[a2])

        #### create Group object from group
        group = cctk.Group.new_from_molecule(
            attach_to=molecule_to_group[atom1], molecule=group)

        if return_mapping:
            return new_mol, group, molecule_to_molecule, molecule_to_group
        else:
            return new_mol, group