Example #1
0
from cctk import XYZFile, ConformationalEnsemble, GaussianFile

#### This file generates a bunch of different phosphate-based rotamers for the given CpG dinucleotide.
#### Usage: ``python generate_conformers.py``

output_file = XYZFile.read_file("CpG.xyz")
mol = output_file.molecule.assign_connectivity()

ensemble = ConformationalEnsemble()

angles = [0, 60, 120, 180, 240, 241, 300]
for x in angles:
    for y in angles:
        mol.set_dihedral(1, 7, 6, 8, x)
        mol.set_dihedral(23, 24, 25, 1, y)
        ensemble.add_molecule(mol, copy=True)

old_num = len(ensemble)
ensemble = ensemble.eliminate_redundant()
new_num = len(ensemble)
print(
    f"originally {old_num} conformers, but after eliminating redundant there are {new_num}!"
)

count = 0
for molecule in ensemble.molecules:
    x = int(round(molecule.get_dihedral(1, 7, 6, 8)))
    y = int(round(molecule.get_dihedral(23, 24, 25, 1)))
    if molecule.check_for_conflicts():
        GaussianFile.write_molecule_to_file(f"CpG_{x}_{y}.gjf",
                                            molecule,
Example #2
0
class GaussianFile(File):
    """
    Class representing Gaussian input/output files.

    Attributes:
        ensemble (ConformationalEnsemble): ``ConformationalEnsemble`` instance
        job_types (list): list of `job_type`` instances
        route_card (str): optional, route card of .gjf file
        link0 (dict): optional, dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16})
        footer (str): optional, footer of .gjf file
        successful_terminations (int): number of successful terminations (should be 1 for an opt, 2 for opt and then freq, 1 for a single point energy, etc)
        elapsed_time (float): total time for job in seconds
        title (str): optional, title of .gjf file
    """

    def __init__(
        self, job_types, route_card=None, link0=None, footer=None, title="title", success=0, elapsed_time=0.0
    ):
        """
        Create new GaussianFile object.

        Args:
            job_types (list): list of ``job_type`` instances
            route_card (str): optional, route card of ``.gjf`` file
            link0 (dict): optional, Link 0 commands of ``.gjf`` file
            footer (str): optional, footer of ``.gjf`` file
            title (str): optional, title of ``.gjf`` file
            success (int): num successful terminations
            elapsed_time (float): total time for job in seconds
        """

        if route_card and not isinstance(route_card, str):
            raise TypeError("route card needs to be a string")

        if link0 and not isinstance(link0, dict):
            raise TypeError("link0 needs to be a dict")

        if footer and not isinstance(footer, str):
            raise TypeError("footer needs to be a string")

        if title and not isinstance(title, str):
            raise TypeError("title needs to be a string")

        if success and not isinstance(success, int):
            raise TypeError("success needs to be an integer")

        if not isinstance(elapsed_time, (float, int)) or elapsed_time < 0.0:
            raise TypeError(f"elapsed_time invalid: {elapsed_time}")

        if job_types is not None:
            if isinstance(job_types, str):
                raise ValueError(f"invalid job_types {job_types} - did you mean to call GaussianFile.read_file({job_types})?")
            if not all(isinstance(job, GaussianJobType) for job in job_types):
                raise TypeError(f"invalid job_types {job_types}")

        self.ensemble = ConformationalEnsemble()
        self.route_card = route_card
        self.link0 = link0
        self.footer = footer
        self.title = title
        self.job_types = job_types
        self.successful_terminations = success
        self.elapsed_time = elapsed_time

    def __str__(self):
        return f"GaussianFile (title=\"{str(self.title)}\", {len(self.ensemble)} entries in Ensemble)"

    @classmethod
    def write_molecule_to_file(cls, filename, molecule, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", append=False, print_symbol=False):
        """
        Write a ``.gjf`` file using the given molecule.

        Args:
            filename (str): path to the new file
            molecule (Molecule): which molecule to use -- a ``Molecule`` object.
            route_card (str): route card for new file
            link0 (dict): dictionary of Link 0 commands
            footer (str): footer for new file
            title (str): title of the file, defaults to "title"
            append (Bool): whether or not to append to file using Link1 specifications
            print_symbol (Bool): whether to print atomic symbols (instead of atomic numbers)
        """
        if not isinstance(molecule, Molecule):
            raise TypeError("need a valid molecule to write a file!")

        if (route_card is None) or (not isinstance(route_card, str)):
            raise ValueError("can't write a file without a route card")

        if not re.match(r"^#p", route_card):
            print(f"ALERT - route card doesn't start with #p: {route_card}")

        #### generate the text
        text = ""
        if append:
            text += "--Link1--\n"

        if isinstance(link0, dict):
            for key, val in link0.items():
                text += f"%{key}={val}\n"

        text += f"{route_card.strip()}\n\n{title}\n\n"

        text += f"{int(molecule.charge)} {int(molecule.multiplicity)}\n"
        for index, Z in enumerate(molecule.atomic_numbers, start=1):
            line = molecule.get_vector(index)
            if print_symbol:
                Z = get_symbol(Z)
                text += f"{Z:>2}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
            else:
                text += f"{Z:2d}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"

        text += "\n"
        if footer is not None:
            text += f"{footer.strip()}\n\n"

        #### write the file
        if append:
            super().append_to_file(filename, text)
        else:
            super().write_file(filename, text)

    def write_file(self, filename, molecule=None, route_card=None, link0=None, footer=None, **kwargs):
        """
        Write a ``.gjf`` file, using object attributes. If no header/footer is specified, the object's header/footer will be used.

        Args:
            filename (str): path to the new file
            molecule (int): which molecule to use -- passed to ``self.get_molecule()``.
                Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble(1-indexed).
                A ``Molecule`` object can also be passed, in which case that molecule will be written to the file.
            route_card (str): route card for new file
            link0 (dict): dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16}
            footer (str): footer for new file
        """
        if not isinstance(molecule, Molecule):
            molecule = self.get_molecule(molecule)

        if route_card is None:
            route_card = self.route_card

        if link0 is None:
            link0 = self.link0

        if footer is None:
            footer = self.footer

        self.write_molecule_to_file(filename, molecule, route_card, link0, footer, **kwargs)

    def num_imaginaries(self):
        """
        Returns the number of imaginary frequencies.
        """
        return len(self.imaginaries())

    def imaginaries(self):
        """
        Returns the imaginary frequencies, rounded to the nearest integer.
        """
        if (GaussianJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None):
            freqs = self.ensemble[-1:,"frequencies"]
            if not isinstance(freqs, list) or len(freqs) == 0:
                return list()
            else:
                return list(map(int, np.array(freqs)[np.array(freqs) < 0]))
        else:
            return list()

    @classmethod
#    @profile
    def read_file(cls, filename, return_lines=False, extended_opt_info=False):
        """
        Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly.
        Only footers from ``opt=modredundant`` can be read automatically --  ``genecep`` custom basis sets, &c must be specified manually.

        Note:

        Will throw ``ValueError`` if there have been no successful iterations.

        Args:
            filename (str): path to the out file
            return_lines (Bool): whether the lines of the file should be returned
            extended_opt_info (Bool): if full parameters about each opt step should be collected
                (by default, only ``rms_displacement`` and ``rms_force`` are collected)
        Returns:
            ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files)
            (optional) the lines of the file (or list of lines of file for Link1 files)
        """
        if re.search("gjf$", filename) or re.search("com$", filename):
            return cls._read_gjf_file(filename, return_lines)

        link1_lines = parse.split_link1(filename)
        files = []

        for link1idx, lines in enumerate(link1_lines):
            #### automatically assign job types based on header
            header = lines.search_for_block("#p", "----", format_line=lambda x: x.lstrip(), join="")
            if header is None:
                raise ValueError("can't find route card! (perhaps '#p' wasn't employed?)")
            job_types = cls._assign_job_types(header)

            link0 = parse.extract_link0(lines)

            title = ""
            title_block = lines.search_for_block("l101.exe", "Symbolic Z-matrix", join="\n")
            if title_block is not None:
                for line in title_block.split("\n")[1:]:
                    if not re.search("-----", line):
                        title += line


            (geometries, atom_list, energies, scf_iterations, success, elapsed_time) = parse.read_geometries_and_energies(lines)
            success, elapsed_time = parse.extract_success_and_time(lines)
            atomic_numbers = []

            #### convert to right datatype
            try:
                atomic_numbers = np.array(atom_list, dtype=np.int8)
            except Exception as e:
                atomic_numbers = np.array(list(map(get_number, atom_list)), dtype=np.int8)

            footer = None
            if re.search("modredundant", str(header)):
                footer = lines.search_for_block("^ The following ModRedundant input section", "^ $", count=1, join="\n")
                if footer is not None:
                    footer = "\n".join(list(footer.split("\n"))[1:])  # get rid of the first line
                    footer = "\n".join([" ".join(list(filter(None, line.split(" ")))) for line in footer.split("\n")])

            bonds = parse.read_bonds(lines)
            charge, multip =  lines.find_parameter("Multiplicity", expected_length=4, which_field=[1,3], split_on="=")[0]

            f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, success=success, elapsed_time=elapsed_time, title=title)

            molecules = [None] * len(geometries)
            properties = [{} for _ in range(len(geometries))]
            for idx, geom in enumerate(geometries):
                molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=bonds)
                if idx < len(energies):
                    properties[idx]["energy"] = energies[idx]
                if idx < len(scf_iterations):
                    properties[idx]["scf_iterations"] = scf_iterations[idx]
                properties[idx]["link1_idx"] = link1idx
                properties[idx]["filename"] = filename
                properties[idx]["iteration"] = idx

            #### now for some job-type specific attributes
            if GaussianJobType.OPT in job_types:
                rms_forces = lines.find_parameter("RMS\s+Force", expected_length=5, which_field=2)
                rms_displacements = lines.find_parameter("RMS\s+Displacement", expected_length=5, which_field=2)

                if extended_opt_info:
                    max_forces = lines.find_parameter("Maximum Force", expected_length=5, which_field=2)
                    max_displacements = lines.find_parameter("Maximum Displacement", expected_length=5, which_field=2)
                    max_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=3)
                    rms_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=5)
                    max_int_forces = lines.find_parameter("Internal  Forces:", expected_length=6, which_field=3)
                    rms_int_forces = lines.find_parameter("Internal  Forces:", expected_length=6, which_field=5)
                    delta_energy = lines.find_parameter("Predicted change in Energy", expected_length=4, which_field=3, cast_to_float=False)

                for idx, force in enumerate(rms_forces):
                    properties[idx]["rms_force"] = force
                    properties[idx]["rms_displacement"] = rms_displacements[idx]

                    if extended_opt_info:
                        if idx < len(max_forces):
                            properties[idx]["max_force"] = max_forces[idx]

                        if idx < len(max_displacements):
                            properties[idx]["max_displacement"] = max_displacements[idx]

                        if idx < len(max_gradients):
                            properties[idx]["max_gradient"] = max_gradients[idx]

                        if idx < len(rms_gradients):
                            properties[idx]["rms_gradient"] = rms_gradients[idx]

                        if idx < len(max_int_forces):
                            properties[idx]["max_internal_force"] = max_int_forces[idx]

                        if idx < len(rms_int_forces):
                            properties[idx]["rms_internal_force"] = rms_int_forces[idx]

                        if idx < len(delta_energy):
                            change_in_energy = re.sub(r"Energy=", "", delta_energy[idx])
                            properties[idx]["predicted_change_in_energy"] = float(change_in_energy.replace('D', 'E'))

            if GaussianJobType.FREQ in job_types:
                enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6)
                if len(enthalpies) == 1:
                    properties[-1]["enthalpy"] = enthalpies[0]
                elif len(enthalpies) > 1:
                    raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")

                gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7)
                if len(gibbs_vals) == 1:
                    properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
                elif len(gibbs_vals) > 1:
                    raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}")

            if GaussianJobType.FREQ in job_types:
                enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6)
                if len(enthalpies) == 1:
                    properties[-1]["enthalpy"] = enthalpies[0]
                elif len(enthalpies) > 1:
                    raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")

                gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7)
                if len(gibbs_vals) == 1:
                    properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
                elif len(gibbs_vals) > 1:
                    raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}")

                frequencies = []
                try:
                    frequencies = sum(lines.find_parameter("Frequencies", expected_length=5, which_field=[2,3,4]), [])
                    properties[-1]["frequencies"] = sorted(frequencies)
                except Exception as e:
                    raise ValueError("error finding frequencies")

                #  Temperature   298.150 Kelvin.  Pressure   1.00000 Atm.
                temperature = lines.find_parameter("Temperature", expected_length=6, which_field=1)
                if len(temperature) == 1:
                    properties[-1]["temperature"] = temperature[0]
                    try:
                        corrected_free_energy = get_corrected_free_energy(gibbs_vals[0], frequencies,
                                                                        frequency_cutoff=100.0, temperature=temperature[0])
                        properties[-1]["quasiharmonic_gibbs_free_energy"] = float(f"{float(corrected_free_energy):.6f}") # yes this is dumb
                    except:
                        pass


            if GaussianJobType.NMR in job_types:
                nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms())
                if nmr_shifts is not None:
                    properties[-1]["isotropic_shielding"] = nmr_shifts.view(OneIndexedArray)

                if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE):
                    couplings = parse.read_j_couplings(lines, molecules[0].num_atoms())
                    if couplings is not None:
                        properties[-1]["j_couplings"] = couplings

            if GaussianJobType.FORCE in job_types:
                assert len(molecules) == 1, "force jobs should not be combined with optimizations!"
                forces = parse.read_forces(lines)
                properties[0]["forces"] = forces

            if GaussianJobType.POP in job_types:
                if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card):
                    charges, spins = parse.read_hirshfeld_charges(lines)
                    properties[-1]["hirshfeld_charges"] = charges
                    properties[-1]["hirshfeld_spins"] = spins

            try:
                charges = parse.read_mulliken_charges(lines)
                properties[-1]["mulliken_charges"] = charges
            except Exception as e:
                pass

            try:
                dipole = parse.read_dipole_moment(lines)
                properties[-1]["dipole_moment"] = dipole
            except Exception as e:
                pass

            for mol, prop in zip(molecules, properties):
                f.ensemble.add_molecule(mol, properties=prop)

            f.check_has_properties()
            files.append(f)

        if return_lines:
            if len(link1_lines) == 1:
                return files[0], link1_lines[0]
            else:
                return files, link1_lines
        else:
            if len(link1_lines) == 1:
                return files[0]
            else:
                return files

    @classmethod
    def _read_gjf_file(cls, filename, return_lines=False):
        """
        Reads a Gaussian ``.gjf`` or ``.com`` file and populates the attributes accordingly.

        Args:
            filename (str): path to the out file
            return_lines (Bool): whether the lines of the file should be returned
        Returns:
            GaussianFile object
            (optional) the lines of the file
        """
        lines = super().read_file(filename)
        header = None
        link0 = {}
        footer = None
        header_done = False
        title = None
        charge = None
        multip = None
        in_geom = False
        atomic_numbers = []
        geometry = []

        for idx, line in enumerate(lines):
            if header is None:
                if re.match("\%", line):
                    pieces = line[1:].split("=")
                    link0[pieces[0]] = pieces[1]
                    continue
                if re.match("#", line):
                    header = line
                    continue

            if (title is None) and (header is not None):
                if header_done:
                    if len(line.strip()) > 0:
                        title = line
                else:
                    if len(line.strip()) > 0:
                        header = header + line
                    else:
                        header_done = True
                continue

            if (title is not None) and (charge is None):
                if len(line.strip()) > 0:
                    pieces = list(filter(None, line.split(" ")))
                    assert len(pieces) == 2, f"can't parse line {line}"

                    charge = int(pieces[0])
                    multip = int(pieces[1])
                    in_geom = True
                    continue

            if in_geom == True:
                if len(line.strip()) == 0:
                    in_geom = False
                else:
                    pieces = list(filter(None, line.split(" ")))
                    assert len(pieces) == 4, f"can't parse line {line}"

                    atomic_numbers.append(pieces[0])
                    geometry.append([pieces[1], pieces[2], pieces[3]])

            if (in_geom == False) and (len(geometry) > 0):
                if footer:
                    footer = footer + "\n" + line
                else:
                    if len(line.strip()) > 0:
                        footer = line

        try:
            atomic_numbers = np.array(atomic_numbers, dtype=np.int8)
        except Exception as e:
            atomic_numbers = np.array(list(map(get_number, atomic_numbers)), dtype=np.int8)

        job_types = cls._assign_job_types(header)

        f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, title=title)
        f.ensemble.add_molecule(Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multip))
        if return_lines:
            return f, lines
        else:
            return f

    def get_molecule(self, num=None, properties=False):
        """
        Returns the last molecule (from an optimization job) or the only molecule (from other jobs).

        If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]``
        If ``properties`` is True, returns ``(molecule, properties)``.
        """
        # some methods pass num=None, which overrides setting the default above
        if num is None:
            num = -1

        if not isinstance(num, int):
            raise TypeError("num must be int")

        if properties:
            return self.ensemble.molecule_list()[num], self.ensemble.properties_list()[num]
        else:
            return self.ensemble.molecule_list()[num]

    @classmethod
    def _assign_job_types(cls, header):
        """
        Assigns ``GaussianJobType`` objects from route card. ``GaussianJobType.SP`` is assigned by default.

        For instance, "#p opt freq=noraman" would give an output of ``[GaussianJobType.SP, GaussianJobType.OPT, GaussianJobType.FREQ]``.

        Args:
            header (str): Gaussian route card

        Returns:
            list of ``GaussianJobType`` objects
        """
        job_types = []
        for name, member in GaussianJobType.__members__.items():
            if re.search(f" {member.value}", str(header), re.IGNORECASE):
                job_types.append(member)
        if GaussianJobType.SP not in job_types:
            job_types.append(GaussianJobType.SP)
        return job_types

    def check_has_properties(self):
        """
        Checks that the file has all the appropriate properties for its job types, and raises ValueError if not.

        This only checks the last molecule in ``self.ensemble``, for now.
        """
        if self.successful_terminations > 0:
            if self.successful_terminations == 1 and ((GaussianJobType.OPT in self.job_types) and (GaussianJobType.FREQ in self.job_types)):
                return # opt freq jobs should have two terminations
            for job_type in self.job_types:
                for prop in EXPECTED_PROPERTIES[job_type.value]:
                    if not self.ensemble.has_property(-1, prop):
                        raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!")
        else:
            return

    @classmethod
    def write_ensemble_to_file(cls, filename, ensemble, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", print_symbol=False):
            """
            Write each structure in the specified ensemble to a single Gaussian input file
            by using the Link1 specification.

            Args:
                filename (str): where to write the file
                ensemble (Ensemble): ``Ensemble`` object to write
                route_card (str or list): to use the same route card for every link, use a single string;
                                          otherwise, provide a list whose entries parallel the ensemble members
                link0 (dict or list of dicts): to use the same memory/processors for every link, use a single string;
                                               otherwise, provide a list
                footer (None/str or list): use None for no text after geometry, provide a str to specify a footer,
                                           or provide some combination of the above as a list
                title (str or list): use a single string to provide a generic title for every link or a list as above
                print_symbol (bool or list): whether to print atomic symbols or atomic numbers in the geometry specification;
                                             use a single bool or a list as above

            """
            n_geometries = len(ensemble)
            assert len(ensemble) > 0, "cannot write a blank ensemble"

            if isinstance(route_card, str):
                route_card = [route_card for _ in ensemble._items]
            elif isinstance(route_card, list):
                assert len(route_card) == n_geometries, f"expected {n_geometries} route cards but got {len(route_card)}"
                for card in route_card:
                    assert isinstance(card, str), "expected route card to be a str"
            else:
                raise ValueError(f"unexpected type for route_card: {str(type(route_card))}")

            if isinstance(link0, dict):
                link0 = [link0 for _ in ensemble._items]
            elif isinstance(link0, list):
                assert len(link0) == n_geometries, f"expected {n_geometries} link0 entries, but got {len(link0)}"
                for d in link0:
                    assert isinstance(d, dict), f"expected dict for link0 but got {str(type(d))}"
            else:
                raise ValueError(f"unexpected type for link0: {str(type(link0))}")

            if footer is None or isinstance(footer, str):
                footer = [footer for _ in ensemble._items]
            elif isinstance(footer, list):
                assert len(footer) == n_geometries, f"expected {n_geometries} footers, but got {len(footer)}"
                for f in footer:
                    assert f is None or isinstance(f, str), f"expected str or None for footer but got {str(type(f))}"
            else:
                raise ValueError(f"unexpected type for footer: {str(type(footer))}")

            if isinstance(title, str):
                assert len(title.strip()) > 0, "zero-length titles not allowed"
                title = [title for _ in ensemble._items]
            elif isinstance(title, list):
                assert len(title) == n_geometries, f"expected {n_geometries} route cards but got {len(title)}"
                for card in title:
                    assert isinstance(card, str), "expected title to be a str"
                    assert len(title.strip()) > 0, "zero-length titles are not allowed"
            else:
                raise ValueError(f"unexpected type for title: {str(type(title))}")

            if isinstance(print_symbol, bool):
                print_symbol = [print_symbol for _ in ensemble._items]
            elif isinstance(print_symbol, list):
                assert len(print_symbol) == n_geometries, f"expected {n_geometries} print_symbol entries but got {len(print_symbol)}"
                for s in print_symbol:
                    assert isinstance(s, bool), f"expected bool for print_symbol but got {str(type(s))}"
            else:
                raise ValueError(f"unexpected type for print_symbol: {str(type(print_symbol))}")

            for idx, molecule in enumerate(ensemble._items):
                if idx == 0:
                    cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=False)
                else:
                    cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=True)

    def add_custom_basis_set(self, name, add_all_elements=False, return_string=False):
        """
        Appends custom basis sets (from Basis Set Exchange) to ``self.footer``. Should be used in combination with the ``gen`` keyword.

        Args:
            name (str): name of basis set (look it up on Basis Set Exchange)
            add_all_elements (bool): whether the complete basis set should be added or just the elements of interest
            return_string (bool): if the basis set should be appended to the footer or returned as a string (no change to ``self``)

        Returns:
            nothing (if return_string is ``False``)
            string of basis set definition (if return string is ``True``)
        """
        import basis_set_exchange as bse
        assert isinstance(name, str), "need basis set name to be a string, for starters"

        try:
            basis_definition = ""
            if add_all_elements:
                basis_definition = bse.get_basis(name, fmt="gaussian94", header=False)
            else:
                elements = list(np.unique(self.get_molecule().atomic_numbers.view(np.ndarray)))
                basis_definition = bse.get_basis(name, fmt="gaussian94", header=False, elements=elements)

            if self.footer is None:
                self.footer = basis_definition
            else:
                self.footer += basis_definition
            self.footer += "\n"

        except Exception as e:
            raise ValueError(f"adding basis set {name} from basis set exchange failed!\n{e}")

    @classmethod
    def read_file(cls, filename, return_lines=False, extended_opt_info=False):
#    def read_fast(cls, filename, return_lines=False, extended_opt_info=False):
        """
        Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly.
        Only footers from ``opt=modredundant`` can be read automatically --  ``genecep`` custom basis sets, &c must be specified manually.

        Note:

        Will throw ``ValueError`` if there have been no successful iterations.

        Args:
            filename (str): path to the out file
            return_lines (Bool): whether the lines of the file should be returned
            extended_opt_info (Bool): if full parameters about each opt step should be collected
                (by default, only ``rms_displacement`` and ``rms_force`` are collected)
        Returns:
            ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files)
            (optional) the lines of the file (or list of lines of file for Link1 files) as Lines object
        """
        if re.search("gjf$", filename) or re.search("com$", filename):
            return cls._read_gjf_file(filename, return_lines)

        link1_lines = parse.split_link1_to_text(filename)
        files = []

        for link1idx, lines in enumerate(link1_lines):
            files.append(parse.read_file_fast(lines, filename, link1idx, extended_opt_info=extended_opt_info))

        if return_lines:
            link1_lines = parse.split_link1(filename)
            if len(link1_lines) == 1:
                return files[0], link1_lines[0]
            else:
                return files, link1_lines
        else:
            if len(link1_lines) == 1:
                return files[0]
            else:
                return files