Exemple #1
0
def xyzfile(xyzfile, ccxyz=False):
    """Parse xyzfile to ccData or ccData_xyz object"""
    if not type(xyzfile) == str:
        print(xzyfile, "is not a xyzfilename")
        raise

    attributes = {}
    ptable = PeriodicTable()

    with open(xyzfile, 'r') as handle:
        lines = handle.readlines()

        charge, mult = _chargemult(lines[1])

        geometry = [x.split() for x in lines[2:]]
        coordinates = [x[1:] for x in geometry]
        atomnos = [ptable.number[x[0]] for x in geometry]
        attributes['atomcoords'] = [np.array(coordinates)]
        attributes['atomnos'] = np.array(atomnos)
        attributes['natom'] = len(atomnos)
        elements = [pt.Element[x] for x in atomnos]
        attributes['atommasses'] = [pt.Mass[x] for x in elements]

        if ccxyz:
            # Custom ccData_xyz attributes
            elements = [x[0] for x in geometry]
            attributes['elements'] = elements
            attributes['comment'] = lines[1]
            attributes['filename'] = os.path.split(xyzfile.rstrip())[1]
            ccObject = ccData_xyz(attributes=attributes)
        else:
            ccObject = ccData(attributes=attributes)

    return ccObject
Exemple #2
0
def makecclib(iodat):
    """ Create cclib ccData object from horton IOData object """

    check_horton()
    attributes = {}

    # Horton 3 IOData class uses attr and does not have __dict__.
    # In horton 3, some attributes have a default value of None.
    # Therefore, second hasattr statement is needed for mo attribute.
    if hasattr(iodat, "atcoords"):
        # cclib parses the whole history of coordinates in the list, horton keeps the last one.
        attributes["atomcoords"] = [iodat.atcoords]
    if hasattr(iodat, "mo") and hasattr(iodat.mo, "norba"):
        # MO coefficient should be transposed to match the dimensions.
        attributes["mocoeffs"] = [iodat.mo.coeffs[:iodat.mo.norba].T]
        if iodat.mo.kind == "unrestricted":
            attributes["mocoeffs"].append(iodat.mo.coeffs[iodat.mo.norba:].T)
    if hasattr(iodat, "spinpol") and isinstance(iodat.spinpol, int):
        # IOData stores 2S, ccData stores 2S+1.
        attributes["mult"] = iodat.spinpol + 1
    if hasattr(iodat, "atnums"):
        attributes["atnums"] = numpy.asanyarray(iodat.atnums)
    if hasattr(iodat, "atcorenums") and isinstance(iodat.atnums,
                                                   numpy.ndarray):
        # cclib stores num of electrons screened out by pseudopotential
        # horton stores num of electrons after applying pseudopotential
        attributes["coreelectrons"] = numpy.asanyarray(
            iodat.atnums) - numpy.asanyarray(iodat.atcorenums)
    if hasattr(iodat, "atcharges"):
        attributes["atomcharges"] = iodat.atcharges

    return ccData(attributes)
Exemple #3
0
def xyzfile(xyzfile, ccxyz=False):
    """ Parse xyzfile to ccData or ccData_xyz object"""
    if not type(xyzfile) == str:
        print(xzyfile, "is not a xyzfilename")
        raise

    attributes = {}
    ptable = PeriodicTable()

    with open(xyzfile, 'r') as handle:
        lines = handle.readlines()

        charge, mult = _chargemult(lines[1])

        geometry = [x.split() for x in lines[2:]]
        coordinates = [x[1:] for x in geometry]
        atomnos = [ptable.number[x[0]] for x in geometry]
        attributes['atomcoords'] = [np.array(coordinates)]
        attributes['atomnos'] = np.array(atomnos)
        attributes['natom'] = len(atomnos)
        elements = [pt.Element[x] for x in atomnos]
        attributes['atommasses'] = [pt.Mass[x] for x in elements]

        if ccxyz:
            # Custom ccData_xyz attributes
            elements = [x[0] for x in geometry]
            attributes['elements'] = elements
            attributes['comment'] = lines[1]
            attributes['filename'] = os.path.split(xyzfile.rstrip())[1]
            ccObject = ccData_xyz(attributes=attributes)
        else:
            ccObject = ccData(attributes=attributes)

    return ccObject
Exemple #4
0
    def generate_repr(self):
        """Convert the raw contents of the source into the internal representation."""

        assert hasattr(self, 'filecontents')

        it = iter(self.filecontents.splitlines())

        # Ordering of lines:
        # 1. number of atoms
        # 2. comment line
        # 3. line of at least 4 columns: 1 is atomic symbol (str), 2-4 are atomic coordinates (float)
        #    repeat for numver of atoms
        # (4. optional blank line)
        # repeat for multiple sets of coordinates

        all_atomcoords = []
        comments = []

        while True:

            try:
                line = next(it)
                if line.strip() == '':
                    line = next(it)
                tokens = line.split()
                assert len(tokens) >= 1
                natom = int(tokens[0])

                comments.append(next(it))

                lines = []
                for _ in range(natom):
                    line = next(it)
                    tokens = line.split()
                    assert len(tokens) >= 4
                    lines.append(tokens)
                assert len(lines) == natom

                atomsyms = [line[0] for line in lines]
                atomnos = [self.pt.number[atomsym] for atomsym in atomsyms]
                atomcoords = [line[1:4] for line in lines]
                # Everything beyond the fourth column is ignored.
                all_atomcoords.append(atomcoords)

            except StopIteration:
                break

        attributes = {
            'natom': natom,
            'atomnos': atomnos,
            'atomcoords': all_atomcoords,
            'metadata': {
                "comments": comments
            },
        }

        self.data = ccData(attributes)
Exemple #5
0
def read_trajectory(filename):
    """Read an ASE Trajectory object and return a ccData object.

    The returned object has everything write_trajectory writes, plus natom,
    charge, mult and temperature.

    The following properties are taken from the last frame: atomnos,
    atomcharges, atomspins, atommasses, moments, freeenergy and temperature.
    charge, mult and natom also represent the last frame, since they depend on
    other propertes read from the last frame.

    Bear in mind that ASE calculates temperature from the kinetic energy, so
    anything "static" (which includes anything cclib parses) will have zero
    temperature.

    Inputs:
        filename - path to traj file to be read.
    """
    _check_ase(_found_ase)
    attributes = {"atomcoords": [], "scfenergies": [], "grads": []}

    for atoms in Trajectory(filename, "r"):
        ccdata = makecclib(atoms)
        attributes["atomcoords"].append(ccdata.atomcoords[-1])
        if hasattr(ccdata, "scfenergies"):
            attributes["scfenergies"].append(ccdata.scfenergies[-1])
        if hasattr(ccdata, "grads"):
            attributes["grads"].append(ccdata.grads[-1])

    # ccdata is now last frame
    attributes["atomnos"] = ccdata.atomnos
    attributes["atomcharges"] = ccdata.atomcharges
    attributes["atomspins"] = ccdata.atomspins
    attributes["atommasses"] = ccdata.atommasses

    if hasattr(ccdata, "moments"):
        attributes["moments"] = ccdata.moments
    if hasattr(ccdata, "freeenergy"):
        attributes["freeenergy"] = ccdata.freeenergy

    # remove if empty
    if not attributes["scfenergies"]:
        del attributes["scfenergies"]
    if not attributes["grads"]:
        del attributes["grads"]

    # extra stuff we can't write in write_trajectory
    attributes["temperature"] = ccdata.temperature
    attributes["charge"] = ccdata.charge
    attributes["mult"] = ccdata.mult
    attributes["natom"] = ccdata.natom

    return ccData(attributes)
Exemple #6
0
    def generate_repr(self):
        """Convert the raw contents of the source into the internal representation."""

        assert hasattr(self, 'filecontents')

        it = iter(self.filecontents.splitlines())

        # Ordering of lines:
        # 1. number of atoms
        # 2. comment line
        # 3. line of at least 4 columns: 1 is atomic symbol (str), 2-4 are atomic coordinates (float)
        #    repeat for numver of atoms
        # (4. optional blank line)
        # repeat for multiple sets of coordinates

        all_atomcoords = []

        while True:

            try:
                line = next(it)
                if line.strip() == '':
                    line = next(it)
                tokens = line.split()
                assert len(tokens) >= 1
                natom = int(tokens[0])

                comment = next(it)

                lines = []
                for _ in range(natom):
                    line = next(it)
                    tokens = line.split()
                    assert len(tokens) >= 4
                    lines.append(tokens)
                assert len(lines) == natom

                atomsyms = [line[0] for line in lines]
                atomnos = [self.pt.number[atomsym] for atomsym in atomsyms]
                atomcoords = [line[1:4] for line in lines]
                # Everything beyond the fourth column is ignored.
                all_atomcoords.append(atomcoords)

            except StopIteration:
                break

        attributes = {
            'natom': natom,
            'atomnos': atomnos,
            'atomcoords': all_atomcoords,
        }

        self.data = ccData(attributes)
Exemple #7
0
def makecclib(mol):
    """Create cclib attributes and return a ccData from an OpenBabel molecule.

    Beyond the numbers, masses and coordinates, we could also set the total charge
    and multiplicity, but often these are calculated from atomic formal charges
    so it is better to assume that would not be correct.
    """

    attributes = {"atomcoords": [], "atommasses": [], "atomnos": [], "natom": mol.NumAtoms()}
    for atom in ob.OBMolAtomIter(mol):
        attributes["atomcoords"].append([atom.GetX(), atom.GetY(), atom.GetZ()])
        attributes["atommasses"].append(atom.GetAtomicMass())
        attributes["atomnos"].append(atom.GetAtomicNum())
    return ccData(attributes)
Exemple #8
0
def multixyzfile(multixyzfile):
    """Parse multixyzfile to list of ccData objects"""
    assert type(multixyzfile) == str

    attributeslist = []

    ptable = PeriodicTable()

    # Check that the file is not empty, if it is not, parse away!
    if os.stat(multixyzfile).st_size == 0:
        raise EOFError(multixyzfile + " is empty")
    else:
        with open(multixyzfile, 'r') as handle:
            attributeslist = []
            lines = handle.readlines()
            filelength = len(lines)
            idx = 0
            while True:
                attributes = {}
                atomcoords = []
                atomnos = []

                # Get number of atoms and charge/mult from comment line
                numatoms = int(lines[idx])
                charge, mult = _chargemult(lines[idx + 1])

                for line in lines[idx + 2:numatoms + idx + 2]:
                    atomgeometry = [x for x in line.split()]
                    atomnos.append(ptable.number[atomgeometry[0]])
                    atomcoords.append([float(x) for x in atomgeometry[1:]])
                idx = numatoms + idx + 2
                attributes['charge'] = charge
                attributes['mult'] = mult
                attributes['atomcoords'] = [np.array(atomcoords)]
                attributes['atomnos'] = np.array(atomnos)
                attributeslist.append(attributes)

                # Break at EOF
                if idx >= filelength:
                    break

        print('Number of conformers parsed:', len(attributeslist))

        ccdatas = [ccData(attributes=attrs) for attrs in attributeslist]
        return ccdatas
Exemple #9
0
def multixyzfile(multixyzfile):
    """Parse multixyzfile to list of ccData objects"""
    assert type(multixyzfile) == str

    attributeslist = []

    ptable = PeriodicTable()

    # Check that the file is not empty, if it is not, parse away!
    if os.stat(multixyzfile).st_size == 0:
        raise EOFError(multixyzfile+" is empty")
    else:
        with open(multixyzfile, 'r') as handle:
            attributeslist = []
            lines = handle.readlines()
            filelength = len(lines)
            idx = 0
            while True:
                attributes = {}
                atomcoords = []
                atomnos = []

                # Get number of atoms and charge/mult from comment line
                numatoms = int(lines[idx])
                charge, mult = _chargemult(lines[idx+1])

                for line in lines[idx+2:numatoms+idx+2]:
                    atomgeometry = [x for x in line.split()]
                    atomnos.append(ptable.number[atomgeometry[0]])
                    atomcoords.append([float(x) for x in atomgeometry[1:]])
                idx = numatoms+idx+2
                attributes['charge'] = charge
                attributes['mult'] = mult
                attributes['atomcoords'] = [np.array(atomcoords)]
                attributes['atomnos'] = np.array(atomnos)
                attributeslist.append(attributes)

                # Break at EOF
                if idx >= filelength:
                    break

        print('Number of conformers parsed:', len(attributeslist))

        ccdatas = [ccData(attributes=attrs) for attrs in attributeslist]
        return ccdatas
Exemple #10
0
def makecclib(mol):
    """Create cclib attributes and return a ccData from an OpenBabel molecule.

    Beyond the numbers, masses and coordinates, we could also set the total charge
    and multiplicity, but often these are calculated from atomic formal charges
    so it is better to assume that would not be correct.
    """
    _check_openbabel(_found_openbabel)
    attributes = {
        'atomcoords':   [],
        'atommasses':   [],
        'atomnos':      [],
        'natom':        mol.NumAtoms(),
    }
    for atom in ob.OBMolAtomIter(mol):
        attributes['atomcoords'].append([atom.GetX(), atom.GetY(), atom.GetZ()])
        attributes['atommasses'].append(atom.GetAtomicMass())
        attributes['atomnos'].append(atom.GetAtomicNum())
    return ccData(attributes)
Exemple #11
0
def mopacoutputfile(mopacoutputfile, nogeometry=True):
    """Parse MOPAC output file"""
    if not nogeometry:
        print("MOPAC geometry parsing not yet implemented - IN PROGRESS")
        raise

    spinstate = {
        'SINGLET': 1,
        'DOUBLET': 2,
        'TRIPLET': 3,
        'QUARTET': 4,
        'QUINTET': 5,
        'SEXTET': 6,
        'HEPTET': 7,
        'OCTET': 8,
        'NONET': 9
    }

    with open(mopacoutputfile, 'r') as handle:
        lines = handle.readlines()
        attributes = {}
        ccdata = None

        # Whether or not we are in geometry printout
        geometry = False

        # Defaults
        charge = 0
        mult = 1

        # Empties
        atomcoords = []
        atomelements = []
        atomnos = []
        natom = None
        scfenergies = []

        subatomelements = []
        subatomcoords = []

        for line in lines:
            if 'CHARGE ON SYSTEM =' in line and charge == 0:
                charge = int(line.split()[5])
                continue
            elif 'SPIN STATE DEFINED AS ' in line and mult == 1:
                mult = spinstate[line.split()[1]]
                continue
            elif "TOTAL ENERGY" in line:
                scf = float(line.split()[3])
                scfkcal = convertor(scf, 'eV', 'kcal')
                scfenergies.append(scfkcal)
                break
            elif geometry and line != ' \n':
                entry = line.split()
                if not entry:
                    geometry = False
                    atomcoords.append(subatomcoords)
                    if not atomelements:
                        atomelements = subatomelements
                        for atomelement in atomelements:
                            atomnos.append(pt.AtomicNum[atomelement])
                        natom = len(atomnos)
                else:
                    subatomelements.append(entry[1])
                    subatomcoords.append(list(map(float, entry[2::2])))

            elif 'NUMBER   SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)' in line:
                geometry = True
                subatomelements = []
                subatomcoords = []

        attributes['natom'] = natom
        attributes['atomcoords'] = atomcoords
        attributes['atomnos'] = atomnos
        attributes['scfenergies'] = scfenergies
        attributes['charge'] = charge
        attributes['mult'] = mult

        ccdata = ccData(attributes=attributes)

        return ccdata
Exemple #12
0
def mopacoutputfile(mopacoutputfile, nogeometry=True):
    """Parse MOPAC output file"""
    if not nogeometry:
        print("MOPAC geometry parsing not yet implemented - IN PROGRESS")
        raise

    spinstate = {'SINGLET': 1,
                 'DOUBLET': 2,
                 'TRIPLET': 3,
                 'QUARTET': 4,
                 'QUINTET': 5,
                 'SEXTET': 6,
                 'HEPTET': 7,
                 'OCTET': 8,
                 'NONET': 9}

    with open(mopacoutputfile, 'r') as handle:
        lines = handle.readlines()
        attributes = {}
        ccdata = None

        # Whether or not we are in geometry printout
        geometry = False

        # Defaults
        charge = 0
        mult = 1

        # Empties
        atomcoords = []
        atomelements = []
        atomnos = []
        natom = None
        scfenergies = []

        subatomelements = []
        subatomcoords = []

        for line in lines:
            if 'CHARGE ON SYSTEM =' in line and charge == 0:
                charge = int(line.split()[5])
                continue
            elif 'SPIN STATE DEFINED AS ' in line and mult == 1:
                mult = spinstate[line.split()[1]]
                continue
            elif "TOTAL ENERGY" in line:
                scf = float(line.split()[3])
                scfkcal = convertor(scf, 'eV', 'kcal')
                scfenergies.append(scfkcal)
                break
            elif geometry and line != ' \n':
                entry = line.split()
                if not entry:
                    geometry = False
                    atomcoords.append(subatomcoords)
                    if not atomelements:
                        atomelements = subatomelements
                        for atomelement in atomelements:
                            atomnos.append(pt.AtomicNum[atomelement])
                        natom = len(atomnos)
                else:
                    subatomelements.append(entry[1])
                    subatomcoords.append(list(map(float, entry[2::2])))

            elif 'NUMBER   SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)' in line:
                geometry = True
                subatomelements = []
                subatomcoords = []

        attributes['natom'] = natom
        attributes['atomcoords'] = atomcoords
        attributes['atomnos'] = atomnos
        attributes['scfenergies'] = scfenergies
        attributes['charge'] = charge
        attributes['mult'] = mult

        ccdata = ccData(attributes=attributes)

        return ccdata
Exemple #13
0
def makecclib(atoms, popname="mulliken"):
    """Create cclib attributes and return a ccData from an ASE Atoms object.

    Available data (such as forces/gradients and potential energy/free
    energy) is assumed to be from SCF (see
    https://wiki.fysik.dtu.dk/ase/ase/atoms.html#adding-a-calculator).

    Bear in mind that ASE calculates temperature from the kinetic energy, so
    anything "static" (which includes anything cclib parses) will have zero
    temperature.

    Inputs:
        atoms - an instance of ASE `Atoms`
        popname - population analysis to use for atomic partial charges and
            atomic spin densities. Molecular charge and multiplicity are
            evaluated from them.
    """
    _check_ase(_found_ase)
    attributes = {
        "atomcoords": np.array([atoms.get_positions()]),
        "atomnos": atoms.get_atomic_numbers(),
        "atommasses": atoms.get_masses(),
        "natom": atoms.get_global_number_of_atoms(),
    }

    try:
        attributes["atomcharges"] = {popname: atoms.get_charges()}
    except (PropertyNotImplementedError, RuntimeError):
        attributes["atomcharges"] = {popname: atoms.get_initial_charges()}
    try:
        attributes["atomspins"] = {popname: atoms.get_magnetic_moments()}
    except (PropertyNotImplementedError, RuntimeError):
        attributes["atomspins"] = {
            popname: atoms.get_initial_magnetic_moments()
        }

    # the following is how ASE determines charge and multiplicity from initial
    # charges and initial magnetic moments in its Gaussian interface
    # (https://gitlab.com/ase/ase/-/blob/a26bda2160527ca7afc0135c69e4367a5bc5a264/ase/io/gaussian.py#L105)
    attributes["charge"] = attributes["atomcharges"][popname].sum()
    attributes["mult"] = attributes["atomspins"][popname].sum() + 1

    try:
        attributes["scfenergies"] = np.array([atoms.get_potential_energy()])
    except RuntimeError:
        pass
    try:
        attributes["grads"] = (-np.array([atoms.get_forces()]) * units.Bohr /
                               units.Hartree)
    except RuntimeError:
        pass
    try:
        attributes["moments"] = [
            atoms.get_center_of_mass(),
            atoms.get_dipole_moment() / units.Bohr,
        ]
    except RuntimeError:
        pass
    try:
        attributes["freeenergy"] = (
            atoms.get_potential_energy(force_consistent=True) / units.Hartree)
    except RuntimeError:
        pass

    attributes["temperature"] = atoms.get_temperature()

    return ccData(attributes)
Exemple #14
0
def makecclib(iodat):
    """ Create cclib ccData object from horton IOData object """

    hortonver = check_horton()
    attributes = {}

    if hortonver == 2:
        # For a few attributes, a simple renaming suffices
        renameAttrs = {
            "numbers": "atomnos",
            "ms2": "mult",
            "polar": "polarizability",
        }
        inputattrs = iodat.__dict__

        attributes = dict(
            (renameAttrs[oldKey], val)
            for (oldKey, val) in inputattrs.items()
            if (oldKey in renameAttrs)
        )

        # Rest of attributes need some manipulation in data structure.
        if hasattr(iodat, "coordinates"):
            # cclib parses the whole history of coordinates in the list, horton keeps the last one.
            attributes["atomcoords"] = [iodat.coordinates]
        if hasattr(iodat, "orb_alpha"):
            attributes["mocoeffs"] = [iodat.orb_alpha]
        if hasattr(iodat, "orb_beta"):
            attributes["mocoeffs"].append(iodat.orb_beta)
        if hasattr(iodat, "pseudo_numbers"):
            # cclib stores num of electrons screened out by pseudopotential
            # horton stores num of electrons after applying pseudopotential
            attributes["coreelectrons"] = iodat.numbers - iodat.pseudo_numbers
        if hasattr(iodat, "mulliken_charges"):
            attributes["atomcharges"] = {"mulliken": iodat.mulliken_charges}
            if hasattr(iodat, "npa_charges"):
                attributes["atomcharges"]["natural"] = iodat.npa_charges
        elif hasattr(iodat, "npa_charges"):
            attributes["atomcharges"] = {"natural": iodat.npa_charges}

    elif hortonver == 3:
        # Horton 3 IOData class uses attr and does not have __dict__.
        # In horton 3, some attributes have a default value of None.
        # Therefore, second hasattr statement is needed for mo attribute.
        if hasattr(iodat, "atcoords"):
            # cclib parses the whole history of coordinates in the list, horton keeps the last one.
            attributes["atomcoords"] = [iodat.atcoords]
        if hasattr(iodat, "mo") and hasattr(iodat.mo, "norba"):
            # MO coefficient should be transposed to match the dimensions.
            attributes["mocoeffs"] = [iodat.mo.coeffs[: iodat.mo.norba].T]
            if iodat.mo.kind == "unrestricted":
                attributes["mocoeffs"].append(iodat.mo.coeffs[iodat.mo.norba :].T)
        if hasattr(iodat, "spinpol") and isinstance(iodat.spinpol, int):
            # IOData stores 2S, ccData stores 2S+1.
            attributes["mult"] = iodat.spinpol + 1
        if hasattr(iodat, "atnums"):
            attributes["atnums"] = numpy.asanyarray(iodat.atnums)
        if hasattr(iodat, "atcorenums") and isinstance(iodat.atnums, numpy.ndarray):
            # cclib stores num of electrons screened out by pseudopotential
            # horton stores num of electrons after applying pseudopotential
            attributes["coreelectrons"] = numpy.asanyarray(iodat.atnums) - numpy.asanyarray(
                iodat.atcorenums
            )
        if hasattr(iodat, "atcharges"):
            attributes["atomcharges"] = iodat.atcharges

    return ccData(attributes)