Esempio n. 1
class OrcaFile(File):
    Generic class for all Orca `.inp` and `.out` files.

        ensemble (ConformationalEnsemble): `ConformationalEnsemble` instance
        job_types (list): list of ``OrcaJobType`` instances
        header (str): keyword line or lines
        variables (dict): list of variables to specify (e.g. ``{"maxcore": 2000}``).
        blocks (dict): list of blocks to change specific settings
            In general, the key should be the block name and the value should be a list of desired lines.
            For instance, configuring a time-dependent DFT job might look like ``{"tddft": ["maxdim 5", "nroots 50"]}``
        successful_terminations (int): number of successful terminations
        elapsed_time (float): total time for job in seconds
    def __init__(self,
        if job_types is not None:
            if not all(isinstance(job, OrcaJobType) for job in job_types):
                raise TypeError(f"invalid job type {job}")
            self.job_types = job_types
            raise ValueError("need job types for new Orca file")

        if ensemble and isinstance(ensemble, ConformationalEnsemble):
            self.ensemble = ensemble
            self.ensemble = ConformationalEnsemble()

        if header and isinstance(header, str):
            self.header = header
            self.header = None

        if blocks and isinstance(blocks, dict):
            for lines in list(blocks.values()):
                assert isinstance(lines, list)
            self.blocks = blocks
            self.blocks = {}

        if variables and isinstance(variables, dict):
            self.variables = variables
            self.variables = {}

    def read_file(cls, filename):
        if"inp$", filename):
            return cls._read_inp_file(filename)

        multiple_lines = parse.split_multiple_inputs(filename)
        files = []

        for lines in multiple_lines:
            input_lines = parse.extract_input_file(lines)
            header = parse.read_header(input_lines)
            job_types = cls._assign_job_types(header)
            variables, blocks = parse.read_blocks_and_variables(input_lines)

            success = 0
            elapsed_time = 0
            for line in lines:
                if line.strip().startswith("****ORCA TERMINATED NORMALLY****"):
                    success += 1
                elif line.startswith("TOTAL RUN TIME"):
                    fields = line.split()
                    assert len(
                    ) == 13, f"unexpected number of fields on elapsed time line:\n{line}"
                    days = float(fields[3])
                    hours = float(fields[5])
                    minutes = float(fields[7])
                    seconds = float(fields[9])
                    elapsed_time = days * 86400 + hours * 3600 + minutes * 60 + seconds

            energies, iters = parse.read_energies(lines)
            if len(energies) == 0:
                return None

            atomic_numbers, geometries = parse.read_geometries(
                lines, num_to_find=len(energies))
            assert len(geometries) >= len(
            ), "can't have an energy without a geometry (cf. pigeonhole principle)"

            charge = lines.find_parameter("xyz", 6, 4)[0]
            multip = lines.find_parameter("xyz", 6, 5)[0]

            #### TODO
            # detect Mayer bond orders

            f = OrcaFile(job_types,
            f.elapsed_time = elapsed_time
            f.successful_terminations = success

            molecules = [None] * len(geometries)
            properties = [{} for _ in range(len(geometries))]
            for idx, geom in enumerate(geometries):
                molecules[idx] = Molecule(atomic_numbers,
                if idx < len(energies):
                    properties[idx]["energy"] = energies[idx]
                properties[idx]["filename"] = filename
                properties[idx]["iteration"] = idx
                properties[idx]["scf_iterations"] = iters[idx]

            if multip > 1:
                s2 = lines.find_parameter("Expectation value of", 6, 5)
                for idx, spin_contam in enumerate(s2):
                    properties[idx]["S**2"] = spin_contam

            if OrcaJobType.OPT in job_types:
                rms_grad, max_grad, rms_step, max_step = parse.read_gradients(
                    lines, len(properties))
                for idx in range(len(rms_grad)):
                    if idx < len(rms_grad):
                        properties[idx]["rms_gradient"] = rms_grad[idx]

                    if idx < len(max_grad):
                        properties[idx]["max_gradient"] = max_grad[idx]

                    if idx < len(rms_step):
                        properties[idx]["rms_step"] = rms_step[idx]

                    if idx < len(max_step):
                        properties[idx]["max_step"] = max_step[idx]

            if OrcaJobType.FREQ in job_types:
                properties[-1]["frequencies"] = sorted(parse.read_freqs(lines))

                enthalpies = lines.find_parameter("Total Enthalpy",
                if len(enthalpies) == 1:
                    properties[-1]["enthalpy"] = enthalpies[0]
                elif len(enthalpies) > 1:
                    raise ValueError(
                        f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}"

                gibbs = lines.find_parameter("Final Gibbs free enthalpy",
                if len(gibbs) == 1:
                    properties[-1]["gibbs_free_energy"] = gibbs[0]
                elif len(gibbs) > 1:
                    raise ValueError(
                        f"unexpected # of gibbs free energies found!\ngibbs free energies = {enthalpies}"

                temperature = lines.find_parameter("Temperature",
                if len(temperature) == 1 and len(gibbs) > 0:
                    properties[-1]["temperature"] = temperature[0]
                    corrected_free_energy = get_corrected_free_energy(
                    properties[-1]["quasiharmonic_gibbs_free_energy"] = float(

            if OrcaJobType.NMR in job_types:
                nmr_shifts = parse.read_nmr_shifts(lines,
                if nmr_shifts is not None:
                    properties[-1]["isotropic_shielding"] = nmr_shifts

                charges = parse.read_mulliken_charges(lines)
                assert len(charges) == len(atomic_numbers)
                properties[-1]["mulliken_charges"] = charges
            except Exception as e:

                charges = parse.read_loewdin_charges(lines)
                assert len(charges) == len(atomic_numbers)
                properties[-1]["lowdin_charges"] = charges
            except Exception as e:

                dipole = lines.find_parameter("Magnitude \(Debye\)", 4, 3)
                properties[-1]["dipole_moment"] = dipole[0]
            except Exception as e:

            for mol, prop in zip(molecules, properties):
                f.ensemble.add_molecule(mol, properties=prop)


        if len(files) == 1:
            return files[0]
            return files

    def _read_inp_file(cls, filename):
        print("reading ``.inp`` files is not currently supported :(")
        return None

    def write_file(self,
        Write a ``.inp`` file, using object attributes. If no header is specified, the object's header will be used.

            filename (str): path to the new file
            molecule (int): which molecule to use -- passed to ``self.get_molecule()``.
                Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble.molecules (0-indexed).
                A ``Molecule`` object can also be passed, in which case that molecule will be written to the file.
            header (str): header for new file
        if molecule is None:
            molecule = -1
        if not isinstance(molecule, Molecule):
            molecule = self.ensemble.molecules[molecule]

        if header is None:
            header = self.header

        if variables is None:
            variables = self.variables

        if blocks is None:
            blocks = self.blocks

        self.write_molecule_to_file(filename, molecule, header, variables,

    def write_molecule_to_file(cls,
        Write an ``.inp`` file using the given molecule.

            filename (str): path to the new file
            molecule (Molecule): which molecule to use -- a ``Molecule`` object.
            header (str): header for new file
            print_symbol (Bool): if atomic symbols should be printed instead of atomic numbers
        assert isinstance(molecule,
                          Molecule), "need a valid molecule to write a file!"
        assert isinstance(header, str), "can't write a file without a header"

        text = f"{header.strip()}\n"

        if variables is not None:
            assert isinstance(variables, dict), "blocks must be a dictionary"
            for k, v in variables.items():
                text += f"%{k} {v}\n"

        if blocks is not None:
            assert isinstance(blocks, dict), "blocks must be a dictionary"
            for k, v in blocks.items():
                text += f"%{k}\n"
                for line in v:
                    text += f"\t{line}\n"
                text += "end\n"

        text += "\n"

        text += f"* xyz {int(molecule.charge)} {int(molecule.multiplicity)}\n"
        for index, Z in enumerate(molecule.atomic_numbers, start=1):
            line = molecule.get_vector(index)
            if print_symbol:
                Z = get_symbol(Z)
                text += f"{Z:>2}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
                text += f"{Z:2d}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"

        text += "*\n"
        text += "\n"

        #### write the file
        super().write_file(filename, text)

    def get_molecule(self, num=None):
        Returns the last molecule (from an optimization job or other multi-molecule jobs) or the only molecule (from other jobs).

        If ``num`` is specified, returns that job (1-indexed for positive numbers). So ``job.get_molecule(3)`` will return the 3rd element of ``job.molecules``, not the 4th.
        # some methods pass num=None, which overrides setting the default above
        if num is None:
            num = -1

        if not isinstance(num, int):
            raise TypeError("num must be int")

        return self.ensemble.molecule_list()[num]

    def num_imaginaries(self):
        Returns the number of imaginary frequencies.
        return len(self.imaginaries())

    def imaginaries(self):
        Returns the imaginary frequencies, rounded to the nearest integer.
        if (OrcaJobType.FREQ
                in self.job_types) and (self.ensemble[-1:, "frequencies"]
                                        is not None):
            freqs = self.ensemble[-1:, "frequencies"]
            if not isinstance(freqs, list) or len(freqs) == 0:
                return list()
                return list(map(int, np.array(freqs)[np.array(freqs) < 0]))
            return list()

    def _assign_job_types(cls, header):
        Assigns ``OrcaJobType`` objects from route card. ``OrcaJobType.SP`` is assigned by default.

            header (str): Orca header

            list of ``OrcaJobType`` objects
        job_types = []
        for name, member in OrcaJobType.__members__.items():
            if" {member.value}", str(header), re.IGNORECASE):
        if OrcaJobType.SP not in job_types:
        return job_types

    def check_has_properties(self):
        Checks that the file has all the appropriate properties for its job types, and raises ``ValueError`` if not.

        This only checks the last molecule in ``self.ensemble``, for now.
        if self.successful_terminations > 0:
            for job_type in self.job_types:
                for prop in EXPECTED_PROPERTIES[job_type.value]:
                    if not self.ensemble.has_property(-1, prop):
                        raise ValueError(
                            f"expected property {prop} for job type {job_type}, but it's not there!"
Esempio n. 2
class GaussianFile(File):
    Class representing Gaussian input/output files.

        ensemble (ConformationalEnsemble): ``ConformationalEnsemble`` instance
        job_types (list): list of `job_type`` instances
        route_card (str): optional, route card of .gjf file
        link0 (dict): optional, dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16})
        footer (str): optional, footer of .gjf file
        successful_terminations (int): number of successful terminations (should be 1 for an opt, 2 for opt and then freq, 1 for a single point energy, etc)
        elapsed_time (float): total time for job in seconds
        title (str): optional, title of .gjf file

    def __init__(
        self, job_types, route_card=None, link0=None, footer=None, title="title", success=0, elapsed_time=0.0
        Create new GaussianFile object.

            job_types (list): list of ``job_type`` instances
            route_card (str): optional, route card of ``.gjf`` file
            link0 (dict): optional, Link 0 commands of ``.gjf`` file
            footer (str): optional, footer of ``.gjf`` file
            title (str): optional, title of ``.gjf`` file
            success (int): num successful terminations
            elapsed_time (float): total time for job in seconds

        if route_card and not isinstance(route_card, str):
            raise TypeError("route card needs to be a string")

        if link0 and not isinstance(link0, dict):
            raise TypeError("link0 needs to be a dict")

        if footer and not isinstance(footer, str):
            raise TypeError("footer needs to be a string")

        if title and not isinstance(title, str):
            raise TypeError("title needs to be a string")

        if success and not isinstance(success, int):
            raise TypeError("success needs to be an integer")

        if not isinstance(elapsed_time, (float, int)) or elapsed_time < 0.0:
            raise TypeError(f"elapsed_time invalid: {elapsed_time}")

        if job_types is not None:
            if isinstance(job_types, str):
                raise ValueError(f"invalid job_types {job_types} - did you mean to call GaussianFile.read_file({job_types})?")
            if not all(isinstance(job, GaussianJobType) for job in job_types):
                raise TypeError(f"invalid job_types {job_types}")

        self.ensemble = ConformationalEnsemble()
        self.route_card = route_card
        self.link0 = link0
        self.footer = footer
        self.title = title
        self.job_types = job_types
        self.successful_terminations = success
        self.elapsed_time = elapsed_time

    def __str__(self):
        return f"GaussianFile (title=\"{str(self.title)}\", {len(self.ensemble)} entries in Ensemble)"

    def write_molecule_to_file(cls, filename, molecule, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", append=False, print_symbol=False):
        Write a ``.gjf`` file using the given molecule.

            filename (str): path to the new file
            molecule (Molecule): which molecule to use -- a ``Molecule`` object.
            route_card (str): route card for new file
            link0 (dict): dictionary of Link 0 commands
            footer (str): footer for new file
            title (str): title of the file, defaults to "title"
            append (Bool): whether or not to append to file using Link1 specifications
            print_symbol (Bool): whether to print atomic symbols (instead of atomic numbers)
        if not isinstance(molecule, Molecule):
            raise TypeError("need a valid molecule to write a file!")

        if (route_card is None) or (not isinstance(route_card, str)):
            raise ValueError("can't write a file without a route card")

        if not re.match(r"^#p", route_card):
            print(f"ALERT - route card doesn't start with #p: {route_card}")

        #### generate the text
        text = ""
        if append:
            text += "--Link1--\n"

        if isinstance(link0, dict):
            for key, val in link0.items():
                text += f"%{key}={val}\n"

        text += f"{route_card.strip()}\n\n{title}\n\n"

        text += f"{int(molecule.charge)} {int(molecule.multiplicity)}\n"
        for index, Z in enumerate(molecule.atomic_numbers, start=1):
            line = molecule.get_vector(index)
            if print_symbol:
                Z = get_symbol(Z)
                text += f"{Z:>2}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
                text += f"{Z:2d}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"

        text += "\n"
        if footer is not None:
            text += f"{footer.strip()}\n\n"

        #### write the file
        if append:
            super().append_to_file(filename, text)
            super().write_file(filename, text)

    def write_file(self, filename, molecule=None, route_card=None, link0=None, footer=None, **kwargs):
        Write a ``.gjf`` file, using object attributes. If no header/footer is specified, the object's header/footer will be used.

            filename (str): path to the new file
            molecule (int): which molecule to use -- passed to ``self.get_molecule()``.
                Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble(1-indexed).
                A ``Molecule`` object can also be passed, in which case that molecule will be written to the file.
            route_card (str): route card for new file
            link0 (dict): dictionary of Link 0 commands (e.g. {"mem": "32GB", "nprocshared": 16}
            footer (str): footer for new file
        if not isinstance(molecule, Molecule):
            molecule = self.get_molecule(molecule)

        if route_card is None:
            route_card = self.route_card

        if link0 is None:
            link0 = self.link0

        if footer is None:
            footer = self.footer

        self.write_molecule_to_file(filename, molecule, route_card, link0, footer, **kwargs)

    def num_imaginaries(self):
        Returns the number of imaginary frequencies.
        return len(self.imaginaries())

    def imaginaries(self):
        Returns the imaginary frequencies, rounded to the nearest integer.
        if (GaussianJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None):
            freqs = self.ensemble[-1:,"frequencies"]
            if not isinstance(freqs, list) or len(freqs) == 0:
                return list()
                return list(map(int, np.array(freqs)[np.array(freqs) < 0]))
            return list()

#    @profile
    def read_file(cls, filename, return_lines=False, extended_opt_info=False):
        Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly.
        Only footers from ``opt=modredundant`` can be read automatically --  ``genecep`` custom basis sets, &c must be specified manually.


        Will throw ``ValueError`` if there have been no successful iterations.

            filename (str): path to the out file
            return_lines (Bool): whether the lines of the file should be returned
            extended_opt_info (Bool): if full parameters about each opt step should be collected
                (by default, only ``rms_displacement`` and ``rms_force`` are collected)
            ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files)
            (optional) the lines of the file (or list of lines of file for Link1 files)
        if"gjf$", filename) or"com$", filename):
            return cls._read_gjf_file(filename, return_lines)

        link1_lines = parse.split_link1(filename)
        files = []

        for link1idx, lines in enumerate(link1_lines):
            #### automatically assign job types based on header
            header = lines.search_for_block("#p", "----", format_line=lambda x: x.lstrip(), join="")
            if header is None:
                raise ValueError("can't find route card! (perhaps '#p' wasn't employed?)")
            job_types = cls._assign_job_types(header)

            link0 = parse.extract_link0(lines)

            title = ""
            title_block = lines.search_for_block("l101.exe", "Symbolic Z-matrix", join="\n")
            if title_block is not None:
                for line in title_block.split("\n")[1:]:
                    if not"-----", line):
                        title += line

            (geometries, atom_list, energies, scf_iterations, success, elapsed_time) = parse.read_geometries_and_energies(lines)
            success, elapsed_time = parse.extract_success_and_time(lines)
            atomic_numbers = []

            #### convert to right datatype
                atomic_numbers = np.array(atom_list, dtype=np.int8)
            except Exception as e:
                atomic_numbers = np.array(list(map(get_number, atom_list)), dtype=np.int8)

            footer = None
            if"modredundant", str(header)):
                footer = lines.search_for_block("^ The following ModRedundant input section", "^ $", count=1, join="\n")
                if footer is not None:
                    footer = "\n".join(list(footer.split("\n"))[1:])  # get rid of the first line
                    footer = "\n".join([" ".join(list(filter(None, line.split(" ")))) for line in footer.split("\n")])

            bonds = parse.read_bonds(lines)
            charge, multip =  lines.find_parameter("Multiplicity", expected_length=4, which_field=[1,3], split_on="=")[0]

            f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, success=success, elapsed_time=elapsed_time, title=title)

            molecules = [None] * len(geometries)
            properties = [{} for _ in range(len(geometries))]
            for idx, geom in enumerate(geometries):
                molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=bonds)
                if idx < len(energies):
                    properties[idx]["energy"] = energies[idx]
                if idx < len(scf_iterations):
                    properties[idx]["scf_iterations"] = scf_iterations[idx]
                properties[idx]["link1_idx"] = link1idx
                properties[idx]["filename"] = filename
                properties[idx]["iteration"] = idx

            #### now for some job-type specific attributes
            if GaussianJobType.OPT in job_types:
                rms_forces = lines.find_parameter("RMS\s+Force", expected_length=5, which_field=2)
                rms_displacements = lines.find_parameter("RMS\s+Displacement", expected_length=5, which_field=2)

                if extended_opt_info:
                    max_forces = lines.find_parameter("Maximum Force", expected_length=5, which_field=2)
                    max_displacements = lines.find_parameter("Maximum Displacement", expected_length=5, which_field=2)
                    max_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=3)
                    rms_gradients = lines.find_parameter("Cartesian Forces:", expected_length=6, which_field=5)
                    max_int_forces = lines.find_parameter("Internal  Forces:", expected_length=6, which_field=3)
                    rms_int_forces = lines.find_parameter("Internal  Forces:", expected_length=6, which_field=5)
                    delta_energy = lines.find_parameter("Predicted change in Energy", expected_length=4, which_field=3, cast_to_float=False)

                for idx, force in enumerate(rms_forces):
                    properties[idx]["rms_force"] = force
                    properties[idx]["rms_displacement"] = rms_displacements[idx]

                    if extended_opt_info:
                        if idx < len(max_forces):
                            properties[idx]["max_force"] = max_forces[idx]

                        if idx < len(max_displacements):
                            properties[idx]["max_displacement"] = max_displacements[idx]

                        if idx < len(max_gradients):
                            properties[idx]["max_gradient"] = max_gradients[idx]

                        if idx < len(rms_gradients):
                            properties[idx]["rms_gradient"] = rms_gradients[idx]

                        if idx < len(max_int_forces):
                            properties[idx]["max_internal_force"] = max_int_forces[idx]

                        if idx < len(rms_int_forces):
                            properties[idx]["rms_internal_force"] = rms_int_forces[idx]

                        if idx < len(delta_energy):
                            change_in_energy = re.sub(r"Energy=", "", delta_energy[idx])
                            properties[idx]["predicted_change_in_energy"] = float(change_in_energy.replace('D', 'E'))

            if GaussianJobType.FREQ in job_types:
                enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6)
                if len(enthalpies) == 1:
                    properties[-1]["enthalpy"] = enthalpies[0]
                elif len(enthalpies) > 1:
                    raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")

                gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7)
                if len(gibbs_vals) == 1:
                    properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
                elif len(gibbs_vals) > 1:
                    raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}")

            if GaussianJobType.FREQ in job_types:
                enthalpies = lines.find_parameter("thermal Enthalpies", expected_length=7, which_field=6)
                if len(enthalpies) == 1:
                    properties[-1]["enthalpy"] = enthalpies[0]
                elif len(enthalpies) > 1:
                    raise ValueError(f"unexpected # of enthalpies found!\nenthalpies = {enthalpies}")

                gibbs_vals = lines.find_parameter("thermal Free Energies", expected_length=8, which_field=7)
                if len(gibbs_vals) == 1:
                    properties[-1]["gibbs_free_energy"] = gibbs_vals[0]
                elif len(gibbs_vals) > 1:
                    raise ValueError(f"unexpected # gibbs free energies found!\ngibbs free energies = {gibbs_vals}")

                frequencies = []
                    frequencies = sum(lines.find_parameter("Frequencies", expected_length=5, which_field=[2,3,4]), [])
                    properties[-1]["frequencies"] = sorted(frequencies)
                except Exception as e:
                    raise ValueError("error finding frequencies")

                #  Temperature   298.150 Kelvin.  Pressure   1.00000 Atm.
                temperature = lines.find_parameter("Temperature", expected_length=6, which_field=1)
                if len(temperature) == 1:
                    properties[-1]["temperature"] = temperature[0]
                        corrected_free_energy = get_corrected_free_energy(gibbs_vals[0], frequencies,
                                                                        frequency_cutoff=100.0, temperature=temperature[0])
                        properties[-1]["quasiharmonic_gibbs_free_energy"] = float(f"{float(corrected_free_energy):.6f}") # yes this is dumb

            if GaussianJobType.NMR in job_types:
                nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms())
                if nmr_shifts is not None:
                    properties[-1]["isotropic_shielding"] = nmr_shifts.view(OneIndexedArray)

                if"nmr=mixed", f.route_card, flags=re.IGNORECASE) or"nmr=spinspin", f.route_card,flags=re.IGNORECASE):
                    couplings = parse.read_j_couplings(lines, molecules[0].num_atoms())
                    if couplings is not None:
                        properties[-1]["j_couplings"] = couplings

            if GaussianJobType.FORCE in job_types:
                assert len(molecules) == 1, "force jobs should not be combined with optimizations!"
                forces = parse.read_forces(lines)
                properties[0]["forces"] = forces

            if GaussianJobType.POP in job_types:
                if"hirshfeld", f.route_card) or"cm5", f.route_card):
                    charges, spins = parse.read_hirshfeld_charges(lines)
                    properties[-1]["hirshfeld_charges"] = charges
                    properties[-1]["hirshfeld_spins"] = spins

                charges = parse.read_mulliken_charges(lines)
                properties[-1]["mulliken_charges"] = charges
            except Exception as e:

                dipole = parse.read_dipole_moment(lines)
                properties[-1]["dipole_moment"] = dipole
            except Exception as e:

            for mol, prop in zip(molecules, properties):
                f.ensemble.add_molecule(mol, properties=prop)


        if return_lines:
            if len(link1_lines) == 1:
                return files[0], link1_lines[0]
                return files, link1_lines
            if len(link1_lines) == 1:
                return files[0]
                return files

    def _read_gjf_file(cls, filename, return_lines=False):
        Reads a Gaussian ``.gjf`` or ``.com`` file and populates the attributes accordingly.

            filename (str): path to the out file
            return_lines (Bool): whether the lines of the file should be returned
            GaussianFile object
            (optional) the lines of the file
        lines = super().read_file(filename)
        header = None
        link0 = {}
        footer = None
        header_done = False
        title = None
        charge = None
        multip = None
        in_geom = False
        atomic_numbers = []
        geometry = []

        for idx, line in enumerate(lines):
            if header is None:
                if re.match("\%", line):
                    pieces = line[1:].split("=")
                    link0[pieces[0]] = pieces[1]
                if re.match("#", line):
                    header = line

            if (title is None) and (header is not None):
                if header_done:
                    if len(line.strip()) > 0:
                        title = line
                    if len(line.strip()) > 0:
                        header = header + line
                        header_done = True

            if (title is not None) and (charge is None):
                if len(line.strip()) > 0:
                    pieces = list(filter(None, line.split(" ")))
                    assert len(pieces) == 2, f"can't parse line {line}"

                    charge = int(pieces[0])
                    multip = int(pieces[1])
                    in_geom = True

            if in_geom == True:
                if len(line.strip()) == 0:
                    in_geom = False
                    pieces = list(filter(None, line.split(" ")))
                    assert len(pieces) == 4, f"can't parse line {line}"

                    geometry.append([pieces[1], pieces[2], pieces[3]])

            if (in_geom == False) and (len(geometry) > 0):
                if footer:
                    footer = footer + "\n" + line
                    if len(line.strip()) > 0:
                        footer = line

            atomic_numbers = np.array(atomic_numbers, dtype=np.int8)
        except Exception as e:
            atomic_numbers = np.array(list(map(get_number, atomic_numbers)), dtype=np.int8)

        job_types = cls._assign_job_types(header)

        f = GaussianFile(job_types=job_types, route_card=header, link0=link0, footer=footer, title=title)
        f.ensemble.add_molecule(Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multip))
        if return_lines:
            return f, lines
            return f

    def get_molecule(self, num=None, properties=False):
        Returns the last molecule (from an optimization job) or the only molecule (from other jobs).

        If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]``
        If ``properties`` is True, returns ``(molecule, properties)``.
        # some methods pass num=None, which overrides setting the default above
        if num is None:
            num = -1

        if not isinstance(num, int):
            raise TypeError("num must be int")

        if properties:
            return self.ensemble.molecule_list()[num], self.ensemble.properties_list()[num]
            return self.ensemble.molecule_list()[num]

    def _assign_job_types(cls, header):
        Assigns ``GaussianJobType`` objects from route card. ``GaussianJobType.SP`` is assigned by default.

        For instance, "#p opt freq=noraman" would give an output of ``[GaussianJobType.SP, GaussianJobType.OPT, GaussianJobType.FREQ]``.

            header (str): Gaussian route card

            list of ``GaussianJobType`` objects
        job_types = []
        for name, member in GaussianJobType.__members__.items():
            if" {member.value}", str(header), re.IGNORECASE):
        if GaussianJobType.SP not in job_types:
        return job_types

    def check_has_properties(self):
        Checks that the file has all the appropriate properties for its job types, and raises ValueError if not.

        This only checks the last molecule in ``self.ensemble``, for now.
        if self.successful_terminations > 0:
            if self.successful_terminations == 1 and ((GaussianJobType.OPT in self.job_types) and (GaussianJobType.FREQ in self.job_types)):
                return # opt freq jobs should have two terminations
            for job_type in self.job_types:
                for prop in EXPECTED_PROPERTIES[job_type.value]:
                    if not self.ensemble.has_property(-1, prop):
                        raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!")

    def write_ensemble_to_file(cls, filename, ensemble, route_card, link0={"mem": "32GB", "nprocshared": 16}, footer=None, title="title", print_symbol=False):
            Write each structure in the specified ensemble to a single Gaussian input file
            by using the Link1 specification.

                filename (str): where to write the file
                ensemble (Ensemble): ``Ensemble`` object to write
                route_card (str or list): to use the same route card for every link, use a single string;
                                          otherwise, provide a list whose entries parallel the ensemble members
                link0 (dict or list of dicts): to use the same memory/processors for every link, use a single string;
                                               otherwise, provide a list
                footer (None/str or list): use None for no text after geometry, provide a str to specify a footer,
                                           or provide some combination of the above as a list
                title (str or list): use a single string to provide a generic title for every link or a list as above
                print_symbol (bool or list): whether to print atomic symbols or atomic numbers in the geometry specification;
                                             use a single bool or a list as above

            n_geometries = len(ensemble)
            assert len(ensemble) > 0, "cannot write a blank ensemble"

            if isinstance(route_card, str):
                route_card = [route_card for _ in ensemble._items]
            elif isinstance(route_card, list):
                assert len(route_card) == n_geometries, f"expected {n_geometries} route cards but got {len(route_card)}"
                for card in route_card:
                    assert isinstance(card, str), "expected route card to be a str"
                raise ValueError(f"unexpected type for route_card: {str(type(route_card))}")

            if isinstance(link0, dict):
                link0 = [link0 for _ in ensemble._items]
            elif isinstance(link0, list):
                assert len(link0) == n_geometries, f"expected {n_geometries} link0 entries, but got {len(link0)}"
                for d in link0:
                    assert isinstance(d, dict), f"expected dict for link0 but got {str(type(d))}"
                raise ValueError(f"unexpected type for link0: {str(type(link0))}")

            if footer is None or isinstance(footer, str):
                footer = [footer for _ in ensemble._items]
            elif isinstance(footer, list):
                assert len(footer) == n_geometries, f"expected {n_geometries} footers, but got {len(footer)}"
                for f in footer:
                    assert f is None or isinstance(f, str), f"expected str or None for footer but got {str(type(f))}"
                raise ValueError(f"unexpected type for footer: {str(type(footer))}")

            if isinstance(title, str):
                assert len(title.strip()) > 0, "zero-length titles not allowed"
                title = [title for _ in ensemble._items]
            elif isinstance(title, list):
                assert len(title) == n_geometries, f"expected {n_geometries} route cards but got {len(title)}"
                for card in title:
                    assert isinstance(card, str), "expected title to be a str"
                    assert len(title.strip()) > 0, "zero-length titles are not allowed"
                raise ValueError(f"unexpected type for title: {str(type(title))}")

            if isinstance(print_symbol, bool):
                print_symbol = [print_symbol for _ in ensemble._items]
            elif isinstance(print_symbol, list):
                assert len(print_symbol) == n_geometries, f"expected {n_geometries} print_symbol entries but got {len(print_symbol)}"
                for s in print_symbol:
                    assert isinstance(s, bool), f"expected bool for print_symbol but got {str(type(s))}"
                raise ValueError(f"unexpected type for print_symbol: {str(type(print_symbol))}")

            for idx, molecule in enumerate(ensemble._items):
                if idx == 0:
                    cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=False)
                    cls.write_molecule_to_file(filename, molecule, route_card[idx], link0[idx], footer=footer[idx], title=title[idx], print_symbol=print_symbol[idx], append=True)

    def add_custom_basis_set(self, name, add_all_elements=False, return_string=False):
        Appends custom basis sets (from Basis Set Exchange) to ``self.footer``. Should be used in combination with the ``gen`` keyword.

            name (str): name of basis set (look it up on Basis Set Exchange)
            add_all_elements (bool): whether the complete basis set should be added or just the elements of interest
            return_string (bool): if the basis set should be appended to the footer or returned as a string (no change to ``self``)

            nothing (if return_string is ``False``)
            string of basis set definition (if return string is ``True``)
        import basis_set_exchange as bse
        assert isinstance(name, str), "need basis set name to be a string, for starters"

            basis_definition = ""
            if add_all_elements:
                basis_definition = bse.get_basis(name, fmt="gaussian94", header=False)
                elements = list(np.unique(self.get_molecule().atomic_numbers.view(np.ndarray)))
                basis_definition = bse.get_basis(name, fmt="gaussian94", header=False, elements=elements)

            if self.footer is None:
                self.footer = basis_definition
                self.footer += basis_definition
            self.footer += "\n"

        except Exception as e:
            raise ValueError(f"adding basis set {name} from basis set exchange failed!\n{e}")

    def read_file(cls, filename, return_lines=False, extended_opt_info=False):
#    def read_fast(cls, filename, return_lines=False, extended_opt_info=False):
        Reads a Gaussian``.out`` or ``.gjf`` file and populates the attributes accordingly.
        Only footers from ``opt=modredundant`` can be read automatically --  ``genecep`` custom basis sets, &c must be specified manually.


        Will throw ``ValueError`` if there have been no successful iterations.

            filename (str): path to the out file
            return_lines (Bool): whether the lines of the file should be returned
            extended_opt_info (Bool): if full parameters about each opt step should be collected
                (by default, only ``rms_displacement`` and ``rms_force`` are collected)
            ``GaussianFile`` object (or list of ``GaussianFile`` objects for Link1 files)
            (optional) the lines of the file (or list of lines of file for Link1 files) as Lines object
        if"gjf$", filename) or"com$", filename):
            return cls._read_gjf_file(filename, return_lines)

        link1_lines = parse.split_link1_to_text(filename)
        files = []

        for link1idx, lines in enumerate(link1_lines):
            files.append(parse.read_file_fast(lines, filename, link1idx, extended_opt_info=extended_opt_info))

        if return_lines:
            link1_lines = parse.split_link1(filename)
            if len(link1_lines) == 1:
                return files[0], link1_lines[0]
                return files, link1_lines
            if len(link1_lines) == 1:
                return files[0]
                return files