Example #1
0
 def parse_basis_set(self):
     # Basis flags
     _rebas02 = 'AO basis set in the form of general basis input'
     _rebas03 = ' (Standard|General) basis'
     _basrep = {'D 0': 'D0 ', 'F 0': 'F0 ',
                'G 0': 'G0 ', 'H 0': 'H0 ', 'I 0': 'I0 '}
     _rebaspat = re.compile('|'.join(_basrep.keys()))
     # Find the basis set
     found = self.regex(_rebas02, _rebas03, keys_only=True)
     if not found[_rebas02]: return
     start = stop = found[_rebas02][0] + 1
     while self[stop].strip(): stop += 1
     # Raw data
     df = self.pandas_dataframe(start, stop, 4)
     def _padx(srs): return [0] + srs.tolist() + [df.shape[0]]
     # Get some indices for appropriate columns
     setdx = _padx(df[0][df[0] == '****'].index)
     shldx = _padx(df[3][~np.isnan(df[3])].index)
     lindx = df[0][df[0].str.lower().isin(lorder + ['sp'])]
     # Populate the df
     df['L'] = lindx.str.lower().map(lmap)
     df['L'] = df['L'].fillna(method='ffill').fillna(
                              method='bfill').astype(np.int64)
     df['center'] = np.concatenate([np.repeat(i, stop - start)
                    for i, (start, stop) in enumerate(zip(setdx, setdx[1:]))])
     df['shell'] = np.concatenate([np.repeat(i-1, stop - start)
                   for i, (start, stop) in enumerate(zip(shldx, shldx[1:]))])
     # Complicated way to get shells but it is flat
     maxshl = df.groupby('center').apply(lambda x: x.shell.max() + 1)
     maxshl.index += 1
     maxshl[0] = 0
     df['shell'] = df['shell'] - df['center'].map(maxshl)
     # Drop all the garbage
     todrop = setdx[:-1] + [i+1 for i in setdx[:-2]] + lindx.index.tolist()
     df.drop(todrop, inplace=True)
     # Keep cleaning
     if df[0].dtype == 'object':
         df[0] = df[0].str.replace('D', 'E').astype(np.float64)
     if df[1].dtype == 'object':
         df[1] = df[1].str.replace('D', 'E').astype(np.float64)
     try: sp = np.isnan(df[2]).sum() == df.shape[0]
     except TypeError:
         df[2] = df[2].str.replace('D', 'E').astype(np.float64)
         sp = True
     df.rename(columns={0: 'alpha', 1: 'd'}, inplace=True)
     # Deduplicate basis sets and expand 'SP' shells if present
     df, setmap = deduplicate_basis_sets(df, sp=sp)
     spherical = '5D' in self[found[_rebas03][0]]
     if df['L'].max() < 2:
         spherical = True
     self.basis_set = BasisSet(df)
     self.meta['spherical'] = spherical
     self.atom['set'] = self.atom['set'].map(setmap)
Example #2
0
 def parse_basis_set(self):
     # Find the basis set
     _re_bas_00 = '(Slater-type)  F U N C T I O N S'
     _re_bas_01 = 'Atom Type'
     start = self.find(_re_bas_00, keys_only=True)[-1] + 3
     starts = self.find(_re_bas_01, start=start, keys_only=True)
     lines = []
     for ext in starts:
         for i in range(4):
             lines.append(start + ext + i)
         stop = start + ext + 4
         while self[stop].strip():
             lines.append(stop)
             stop += 1
     df = pd.read_fwf(StringIO('\n'.join([self[i] for i in lines])),
                      widths=[4, 2, 12, 4],
                      names=['n', 'L', 'alpha', 'symbol'])
     # Where atom types change
     idxs = [0] + df['n'][df['n'] == '---'].index.tolist() + [df.shape[0]]
     sets, shells = [], []
     for i, (start, stop) in enumerate(zip(idxs, idxs[1:])):
         sets.append(np.repeat(i - 1, stop - start))
         shells.append(np.arange(-1, stop - start - 1))
     df['set'] = np.concatenate(sets)
     df['shell'] = np.concatenate(shells)
     # Atom table basis set map
     basmap = df['symbol'].dropna()
     basmap = basmap[basmap.str.endswith(')')].str.strip(')')
     basmap = {
         val: df['set'][key] + 1
         for key, val in basmap.to_dict().items()
     }
     # Discard the garbage
     drop = df['n'].str.strip().str.isnumeric().fillna(False)
     df.drop(drop[drop == False].index, inplace=True)
     df.drop('symbol', axis=1, inplace=True)
     # Clean up the series
     df['alpha'] = df['alpha'].astype(np.float64)
     df['n'] = df['n'].astype(np.int64)
     df['L'] = df['L'].str.lower().map(lmap)
     df['d'] = np.sqrt((2 * df['L'] + 1) / (4 * np.pi))
     df['r'] = df['n'] - (df['L'] + 1)
     df['frame'] = 0
     self.basis_set = BasisSet(df)
     self.meta['spherical'] = False
     self.atom['set'] = self.atom['symbol'].map(basmap)
Example #3
0
 def setUp(self):
     adict = {col: [0] for col in BasisSet._columns}
     adict['frame'] = 0
     # Trivial basis set
     self.bs = BasisSet(adict)
     self.bs['alpha'] = self.bs['alpha'].astype(np.float64)
     self.bs['d'] = self.bs['d'].astype(np.float64)
     # Medium basis set
     self.mbs = BasisSet({
         'frame': 0,
         'alpha': [5., 1., 1.],
         'd': [1., 1., 1.],
         'shell': [0, 1, 0],
         'set': [0, 0, 1],
         'L': [0, 1, 0],
         'n': [1, 2, 1]
     })
     # Large basis set
     self.lbs = BasisSet({
         'frame': 0,
         'alpha': [5., 3., 1., 3., 1., 1., 3., 1., 1.],
         'd': [1., 1., 1., 1., 1., 1., 1., 1., 1.],
         'shell': [0, 0, 0, 1, 1, 2, 0, 0, 1],
         'set': [0, 0, 0, 0, 0, 0, 1, 1, 1],
         'L': [0, 0, 0, 1, 1, 2, 0, 0, 1]
     })
Example #4
0
 def parse_basis_set(self):
     """
     Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
     """
     if not hasattr(self, "atom"):
         self.parse_atom()
     _rebas01 = ' Basis "'
     _rebas02 = ' Summary of "'
     _rebas03 = [
         ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ',
         ' j ', ' k ', ' l ', ' m ', ' p '
     ]
     found = self.find(_rebas01, _rebas02)
     spherical = True if "spherical" in found[_rebas01][0][1] else False
     start = found[_rebas01][0][0] + 2
     idx = 1 if len(found[_rebas02]) > 1 else -1
     stop = found[_rebas02][idx][0] - 1
     # Read in all of the extra lines that contain ---- and tag names
     df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                      widths=(4, 2, 16, 16),
                      names=("shell", "L", "alpha", "d"))
     df.loc[df['shell'] == "--", "shell"] = np.nan
     tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
     idxs = tags.index.tolist()
     idxs.append(len(df))
     df['set'] = ""
     for i, tag in enumerate(tags):
         df.loc[idxs[i]:idxs[i + 1], "set"] = tag
     df = df.dropna().reset_index(drop=True)
     mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
     df['set'] = df['set'].map(mapper)
     df['L'] = df['L'].str.strip().str.lower().map(lmap)
     df['alpha'] = df['alpha'].astype(float)
     df['d'] = df['d'].astype(float)
     # NO SUPPORT FOR MULTIPLE FRAMES?
     df['frame'] = 0
     self.basis_set = BasisSet(df)
     self.meta['spherical'] = spherical
     self.atom['set'] = self.atom['tag'].map(mapper)
Example #5
0
 def parse_basis_set(self):
     """
     Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
     """
     if not hasattr(self, "atom"):
         self.parse_atom()
     _rebas01 = ' Basis "'
     _rebas02 = ' Summary of "'
     _rebas03 = [' s ', ' px ', ' py ', ' pz ',
                 ' d ', ' f ', ' g ', ' h ', ' i ',
                 ' j ', ' k ', ' l ', ' m ', ' p ']
     found = self.find(_rebas01, _rebas02)
     spherical = True if "spherical" in found[_rebas01][0][1] else False
     start = found[_rebas01][0][0] + 2
     idx = 1 if len(found[_rebas02]) > 1 else -1
     stop = found[_rebas02][idx][0] - 1
     # Read in all of the extra lines that contain ---- and tag names
     df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                      widths=(4, 2, 16, 16),
                      names=("shell", "L", "alpha", "d"))
     df.loc[df['shell'] == "--", "shell"] = np.nan
     tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
     idxs = tags.index.tolist()
     idxs.append(len(df))
     df['set'] = ""
     for i, tag in enumerate(tags):
         df.loc[idxs[i]:idxs[i + 1], "set"] = tag
     df = df.dropna().reset_index(drop=True)
     mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
     df['set'] = df['set'].map(mapper)
     df['L'] = df['L'].str.strip().str.lower().map(lmap)
     df['alpha'] = df['alpha'].astype(float)
     df['d'] = df['d'].astype(float)
     # NO SUPPORT FOR MULTIPLE FRAMES?
     df['frame'] = 0
     self.basis_set = BasisSet(df)
     self.meta['spherical'] = spherical
     self.atom['set'] = self.atom['tag'].map(mapper)
Example #6
0
class Output(six.with_metaclass(OutMeta, Editor)):
    """Editor for NWChem calculation output file (stdout)."""

    def parse_atom(self):
        """Parse the atom dataframe."""
        _reatom01 = 'Geometry "'
        _reatom02 = 'Atomic Mass'
        _reatom03 = 'ECP       "ecp basis"'
        _reatom04 = 'Output coordinates in'
        found = self.find(_reatom01, _reatom02,
                          _reatom03, _reatom04, keys_only=True)
        unit = self[found[_reatom04][0]].split()[3]
        unit = "Angstrom" if unit == "angstroms" else "au"
        starts = np.array(found[_reatom01]) + 7
        stops = np.array(found[_reatom02]) - 1
        ecps = np.array(found[_reatom03]) + 2
        ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps}
        columns = ['label', 'tag', 'Z', 'x', 'y', 'z']
        atom = pd.concat([self.pandas_dataframe(s, e, columns)
                          for s, e in zip(starts, stops)])
        atom['symbol'] = atom['tag'].str.extract('([A-z]{1,})([0-9]*)',
                                                 expand=False)[0].str.lower().str.title()
        atom['Z'] = atom['Z'].astype(np.int64)
        atom['Zeff'] = (atom['Z'] - atom['tag'].map(ecps).fillna(value=0)).astype(np.int64)
        #n = len(atom)
        nf = atom.label.value_counts().max()
        nat = atom.label.max()
        atom['frame'] = [i for i in range(nf) for j in range(nat)]
        atom['label'] -= 1
        atom['x'] *= Length[unit, 'au']
        atom['y'] *= Length[unit, 'au']
        atom['z'] *= Length[unit, 'au']
        if atom['frame'].max() > 0:
            li = atom['frame'].max()
            atom = atom[~(atom['frame'] == li)]
            atom.reset_index(drop=True, inplace=True)
        del atom['label']
        self.atom = Atom(atom)

    def parse_orbital(self):
        """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe."""
        orbital = None
        _remo01 = 'Molecular Orbital Analysis'
        _remo02 = 'alpha - beta orbital overlaps'
        _remo03 = 'center of mass'
        check = self.find(_remo01)
        if any(['Alpha' in value for value in check]):
            alpha_starts = np.array([no for no, line in check if 'Alpha' in line], dtype=np.int64) + 2
            alpha_stops = np.array([no for no, line in check if 'Beta' in line], dtype=np.int64) - 1
            beta_starts = alpha_stops + 3
            beta_stops = np.array(self.find(_remo02, keys_only=True), dtype=np.int64) - 1
            alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops)
            beta_orbital = self._parse_orbital(beta_starts, beta_stops)
            alpha_orbital['spin'] = 0
            beta_orbital['spin'] = 1
            orbital = pd.concat((alpha_orbital, beta_orbital), ignore_index=True)
        else:
            starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2
            stops = np.array(self.find(_remo03, keys_only=True), dtype=np.int64) - 1
            orbital = self._parse_orbital(starts, stops)
            orbital['spin'] = 0
        orbital['group'] = 0
        self.orbital = Orbital(orbital)

    def parse_momatrix(self):
        """
        Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe.

        Note:
            Must supply 'print "final vectors" "final vectors analysis"' for momatrix
        """
        key0 = "Final MO vectors"
        key1 = "center of mass"
        found = self.find(key0, key1)
        if found[key0]:
            start = found[key0][0][0] + 6
            end = found[key1][0][0] - 1
            c = pd.read_fwf(StringIO("\n".join(self[start:end])), widths=(6, 12, 12, 12, 12, 12, 12),
                            names=list(range(7)))
            self.c = c
            idx = c[c[0].isnull()].index.values
            c = c[~c.index.isin(idx)]
            del c[0]
            nbas = len(self.basis_set_order)
            n = c.shape[0]//nbas
            coefs = []
            # The for loop below is like numpy.array_split(df, n); using numpy.array_split
            # with dataframes seemed to have strange results where splits had wrong sizes?
            for i in range(n):
                coefs.append(c.iloc[i*nbas:(i+1)*nbas, :].astype(float).dropna(axis=1).values.ravel("F"))
            c = np.concatenate(coefs)
            del coefs
            orbital, chi = _square_indices(len(self.basis_set_order))
            self.momatrix = MOMatrix.from_dict({'coef': c, 'chi': chi, 'orbital': orbital, 'frame': 0})
            # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital})
            # momatrix['frame'] = 0
            # self.momatrix = momatrix



    def _parse_orbital(self, starts, stops):
        '''
        This function actually performs parsing of :class:`~exatomic.orbital.Orbital`

        See Also:
            :func:`~exnwchem.output.Output.parse_orbital`
        '''
        joined = '\n'.join(['\n'.join(self[s:e]) for s, e in zip(starts, stops)])
        nvec = joined.count('Vector')
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        mapper = self.basis_set.functions(self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        nbas *= nvec
        # Orbital dataframe -- alternatively one could parse the strings
        # into the DataFrame and then use the pd.Series.str methods to
        # perform all the replacements at the same time, eg. 'D' --> 'E'
        # and 'Occ=' --> '', etc.
        orb_no = np.empty((nvec, ), dtype=np.int64)
        occ = np.empty((nvec, ), dtype=np.float64)
        nrg = np.empty((nvec, ), dtype=np.float64)
        x = np.empty((nvec, ), dtype=np.float64)
        y = np.empty((nvec, ), dtype=np.float64)
        z = np.empty((nvec, ), dtype=np.float64)
        frame = np.empty((nvec, ), dtype=np.int64)
        fc = -1   # Frame counter
        oc = 0   # Orbital counter
        for s, e in zip(starts, stops):
            fc += 1
            for line in self[s:e]:
                ls = line.split()
                if 'Vector' in line:
                    orb_no[oc] = ls[1]
                    occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E')
                    nrg[oc] = ls[3].replace('E=', '').replace('D', 'E') if 'E=-' in line else ls[4].replace('D', 'E')
                    frame[oc] = fc
                elif 'MO Center' in line:
                    x[oc] = ls[2].replace(',', '').replace('D', 'E')
                    y[oc] = ls[3].replace(',', '').replace('D', 'E')
                    z[oc] = ls[4].replace(',', '').replace('D', 'E')
                    oc += 1
        orb_no -= 1
        return pd.DataFrame.from_dict({'x': x, 'y': z, 'z': z, 'frame': frame,
                                       'vector': orb_no, 'occupation': occ, 'energy': nrg})

    def parse_basis_set(self):
        """
        Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
        """
        if not hasattr(self, "atom"):
            self.parse_atom()
        _rebas01 = ' Basis "'
        _rebas02 = ' Summary of "'
        _rebas03 = [' s ', ' px ', ' py ', ' pz ',
                    ' d ', ' f ', ' g ', ' h ', ' i ',
                    ' j ', ' k ', ' l ', ' m ', ' p ']
        found = self.find(_rebas01, _rebas02)
        spherical = True if "spherical" in found[_rebas01][0][1] else False
        start = found[_rebas01][0][0] + 2
        idx = 1 if len(found[_rebas02]) > 1 else -1
        stop = found[_rebas02][idx][0] - 1
        # Read in all of the extra lines that contain ---- and tag names
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(4, 2, 16, 16),
                         names=("shell", "L", "alpha", "d"))
        df.loc[df['shell'] == "--", "shell"] = np.nan
        tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
        idxs = tags.index.tolist()
        idxs.append(len(df))
        df['set'] = ""
        for i, tag in enumerate(tags):
            df.loc[idxs[i]:idxs[i + 1], "set"] = tag
        df = df.dropna().reset_index(drop=True)
        mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
        df['set'] = df['set'].map(mapper)
        df['L'] = df['L'].str.strip().str.lower().map(lmap)
        df['alpha'] = df['alpha'].astype(float)
        df['d'] = df['d'].astype(float)
        # NO SUPPORT FOR MULTIPLE FRAMES?
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = spherical
        self.atom['set'] = self.atom['tag'].map(mapper)

    def parse_basis_set_order(self):
        dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')]
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        if self.meta['spherical']:
            dtype += [('ml', 'i8')]
        else:
            dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')]
        mapper = self.basis_set.functions(self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        bso = np.empty((nbas,), dtype=dtype)
        cnt = 0
        bases = self.basis_set.groupby('set')
        for seht, center in zip(self.atom['set'], self.atom.index):
            bas = bases.get_group(seht).groupby('shell')
            if self.meta['spherical']:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for ml in spherical_ordering_function(l):
                        bso[cnt] = (center, shell, l, ml)
                        cnt += 1
            else:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for _, ll, m, n in cartesian_ordering_function(l):
                        bso[cnt] = (center, shell, l, ll, m, n)
                        cnt += 1
        bso = pd.DataFrame(bso)
        bso['frame'] = 0
        # New shell definition consistent with basis internals
        shls = []
        grps = bso.groupby(['center', 'L'])
        cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        for (cen, L), grp in grps:
            for ml in grp['ml']:
                shls.append(cache[cen][L][ml])
                cache[cen][L][ml] += 1
        bso['shell'] = shls
        self.basis_set_order = bso

    def parse_frame(self):
        """
        Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed)
        :class:`~exatomic.core.atom.Atom` object.
        """
        _rescfen = 'Total SCF energy'
        _redften = 'Total DFT energy'
        self.frame = compute_frame_from_atom(self.atom)
        found = self.find(_rescfen, _redften)
        scfs = found[_rescfen]
        dfts = found[_redften]
        if scfs and dfts:
            print('Warning: found total energies from scf and dft, using dft')
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts
        elif scfs:
            scfs = [float(val.split()[-1]) for key, val in scfs]
            self.frame['total_energy'] = scfs
        elif dfts:
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts


    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)
Example #7
0
class Output(six.with_metaclass(OutMeta, Editor)):
    """The ADF output parser."""
    def parse_atom(self):
        # TODO : only supports single frame, gets last atomic positions
        _re_atom_00 = 'Atoms in this Fragment     Cart. coord.s (Angstrom)'
        start = stop = self.find(_re_atom_00, keys_only=True)[0] + 2
        while self[stop].strip():
            stop += 1
        atom = self.pandas_dataframe(start, stop, 7)
        atom.drop([0, 2, 3], axis=1, inplace=True)
        atom.columns = ['symbol', 'x', 'y', 'z']
        for c in ['x', 'y', 'z']:
            atom[c] *= Length['Angstrom', 'au']
        atom['Z'] = atom['symbol'].map(sym2z)
        atom['frame'] = 0
        self.atom = atom

    def parse_basis_set(self):
        # Find the basis set
        _re_bas_00 = '(Slater-type)  F U N C T I O N S'
        _re_bas_01 = 'Atom Type'
        start = self.find(_re_bas_00, keys_only=True)[-1] + 3
        starts = self.find(_re_bas_01, start=start, keys_only=True)
        lines = []
        for ext in starts:
            for i in range(4):
                lines.append(start + ext + i)
            stop = start + ext + 4
            while self[stop].strip():
                lines.append(stop)
                stop += 1
        df = pd.read_fwf(StringIO('\n'.join([self[i] for i in lines])),
                         widths=[4, 2, 12, 4],
                         names=['n', 'L', 'alpha', 'symbol'])
        # Where atom types change
        idxs = [0] + df['n'][df['n'] == '---'].index.tolist() + [df.shape[0]]
        sets, shells = [], []
        for i, (start, stop) in enumerate(zip(idxs, idxs[1:])):
            sets.append(np.repeat(i - 1, stop - start))
            shells.append(np.arange(-1, stop - start - 1))
        df['set'] = np.concatenate(sets)
        df['shell'] = np.concatenate(shells)
        # Atom table basis set map
        basmap = df['symbol'].dropna()
        basmap = basmap[basmap.str.endswith(')')].str.strip(')')
        basmap = {
            val: df['set'][key] + 1
            for key, val in basmap.to_dict().items()
        }
        # Discard the garbage
        drop = df['n'].str.strip().str.isnumeric().fillna(False)
        df.drop(drop[drop == False].index, inplace=True)
        df.drop('symbol', axis=1, inplace=True)
        # Clean up the series
        df['alpha'] = df['alpha'].astype(np.float64)
        df['n'] = df['n'].astype(np.int64)
        df['L'] = df['L'].str.lower().map(lmap)
        df['d'] = np.sqrt((2 * df['L'] + 1) / (4 * np.pi))
        df['r'] = df['n'] - (df['L'] + 1)
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = False
        self.atom['set'] = self.atom['symbol'].map(basmap)

    def parse_basis_set_order(self):
        # All the columns we need
        data = defaultdict(list)
        sets = self.basis_set.groupby('set')
        # Iterate over atoms
        for center, symbol, seht in zip(self.atom.index, self.atom['symbol'],
                                        self.atom['set']):
            # Per basis set
            bas = sets.get_group(seht).groupby('L')
            for L, grp in bas:
                # Iterate over cartesians
                for l, m, n in enum_cartesian[L]:
                    for shell, r in zip(grp['shell'], grp['r']):
                        data['center'].append(center)
                        data['symbol'].append(symbol)
                        data['shell'].append(shell)
                        data['seht'].append(seht)
                        data['L'].append(L)
                        data['l'].append(l)
                        data['m'].append(m)
                        data['n'].append(n)
                        data['r'].append(r)
        data['set'] = data.pop('seht')
        data['frame'] = 0
        self.basis_set_order = pd.DataFrame.from_dict(data)
        self.basis_set_order['prefac'] = (
            self.basis_set_order['L'].apply(dfac21) /
            (self.basis_set_order['l'].apply(dfac21) *
             self.basis_set_order['m'].apply(dfac21) *
             self.basis_set_order['n'].apply(dfac21))).apply(np.sqrt)

    def parse_orbital(self):
        _re_orb_00 = 'Orbital Energies, both Spins'
        _re_orb_01 = 'Orbital Energies, per Irrep and Spin'
        found = self.find(_re_orb_00, _re_orb_01, keys_only=True)
        # Open shell vs. closed shell
        cols = {
            _re_orb_00:
            ['symmetry', 'vector', 'spin', 'occupation', 'energy', 'eV'],
            _re_orb_01: ['vector', 'occupation', 'energy', 'eV', 'dE']
        }
        key = _re_orb_00 if found[_re_orb_00] else _re_orb_01
        start = stop = found[key][-1] + 5
        while self[stop].strip():
            stop += 1
        df = self.pandas_dataframe(start, stop, cols[key])
        df['vector'] -= 1
        if 'spin' in cols[key]:
            df['spin'] = df.spin.map({'A': 0, 'B': 1})
            df.sort_values(by=['spin', 'energy'], inplace=True)
        else:
            df.sort_values(by='energy', inplace=True)
            df['spin'] = 0
        df.reset_index(drop=True, inplace=True)
        df['frame'] = df['group'] = 0
        self.orbital = df

    def parse_contribution(self):
        _re_con_00 = ('E(eV)  Occ       MO           %     '
                      'SFO (first member)   E(eV)  Occ   Fragment')
        # MO contribution by percentage
        found = self.find(_re_con_00, keys_only=True)
        starts = [i + 3 for i in found]
        widths = [12, 6, 6, 6, 11, 6, 10, 12, 6, 6, 3]
        names = [
            'eV', 'occupation', 'vector', 'sym', '%', 'SFO', 'angmom',
            'eV(sfo)', 'occ(sfo)', 'atom', 'symbol'
        ]
        dfs = []
        # Prints for both spins
        for i, start in enumerate(starts):
            stop = start
            while self[stop].strip():
                stop += 1
            dfs.append(
                pd.read_fwf(StringIO('\n'.join(self[start:stop])),
                            delim_whitespace=True,
                            widths=widths,
                            names=names))
            dfs[-1]['spin'] = i
        dfs = pd.concat(dfs).reset_index(drop=True)
        dfs = dfs.applymap(lambda x: np.nan if (isinstance(
            x, six.string_types) and x.isspace()) else x)
        dfs.fillna(method='ffill', inplace=True)
        # Clean up
        dfs['symbol'] = dfs['symbol'].str.strip()
        dfs['angmom'] = dfs['angmom'].str.strip()
        dfs['angmom'].update(dfs['angmom'].map({'S': 'S:'}))
        dfs[['L', 'ml']] = dfs['angmom'].str.extract('(.*):(.*)', expand=True)
        dfs['%'] = dfs['%'].str.replace('%', '')
        dfs['%'].update(dfs['%'].map({"    ******": np.inf}))
        dfs['%'] = dfs['%'].astype(np.float64)
        dfs['occupation'] = dfs['occupation'].astype(np.float64)
        dfs['vector'] = dfs['vector'].astype(np.int64) - 1
        dfs['eV'] = dfs['eV'].astype(np.float64)
        dfs['atom'] -= 1
        self.contribution = dfs

    def parse_excitation(self):
        # Excitation
        _re_exc_00 = '(sum=1) transition dipole moment'
        _re_exc_01 = ' no.     E/a.u.        E/eV      f           Symmetry'
        found = self.find_next(_re_exc_00, keys_only=True)
        if not found: return
        # First table of interest here
        start = found + 4
        stop = self.find_next(_re_exc_01, keys_only=True) - 3
        os = len(self[start].split()) == 9
        todrop = ['occ:', 'virt:']
        cols = [
            'excitation', 'occ', 'drop', 'virt', 'weight', 'TDMx', 'TDMy',
            'TDMz'
        ]
        if os: cols.insert(1, 'spin')
        if os: todrop = ['occ', 'virt']
        adf = self.pandas_dataframe(start, stop, cols)
        adf.drop('drop', axis=1, inplace=True)
        s1 = set(adf[cols[1]][adf[cols[1]] == 'NTO'].index)
        s2 = set(adf['excitation'][adf['excitation'].isin(todrop)].index)
        adf.drop(s1 | s2, axis=0, inplace=True)
        adf['excitation'] = adf['excitation'].str[:-1].astype(np.int64) - 1
        if os: adf['spin'] = adf['spin'].map({'Alph': 0, 'Beta': 1})
        adf[['occ', 'occsym']] = adf['occ'].str.extract('([0-9]*)(.*)',
                                                        expand=True)
        adf[['virt', 'virtsym']] = adf['virt'].str.extract('([0-9]*)(.*)',
                                                           expand=True)
        adf['occ'] = adf['occ'].astype(np.int64) - 1
        adf['virt'] = adf['virt'].astype(np.int64) - 1
        # Second one here
        start = stop + 5
        stop = start
        while self[stop].strip():
            stop += 1
        cols = _re_exc_01.split()
        df = self.pandas_dataframe(start, stop + 1, cols)
        df.drop(cols[0], axis=1, inplace=True)
        df.columns = ['energy', 'eV', 'osc', 'symmetry']
        # Expand the second table to fit the original
        for col in df.columns:
            adf[col] = adf.excitation.map(df[col])
        adf['frame'] = adf['group'] = 0
        self.excitation = adf

    def parse_momatrix(self):
        _re_mo_00 = 'Eigenvectors .* in BAS representation'
        _re_mo_01 = 'row '
        _re_mo_02 = 'nosym'
        found = self.regex(_re_mo_00,
                           _re_mo_01,
                           _re_mo_02,
                           flags=re.IGNORECASE,
                           keys_only=True)
        if not found[_re_mo_00] or not found[_re_mo_01]: return
        if found[_re_mo_02]:
            thresh = found[_re_mo_00][0]
            rowmajor = 'rows' in self[thresh]
            starts = np.array([i for i in found[_re_mo_01] if i > thresh]) + 1
            nchi = starts[1] - starts[0] - 3
            ncol = len(self[starts[0] + 1].split()) - 1
            if len(starts) % 2: os = False
            else:
                anchor = starts[len(starts) // 2 - 1] + nchi
                sail = starts[len(starts) // 2]
                os = True if self.find('SPIN 2', start=anchor,
                                       stop=sail) else False
            blocks = [starts] if not os else [
                starts[:len(starts) // 2], starts[len(starts) // 2:]
            ]
            data = pd.DataFrame()
            for i, block in enumerate(blocks):
                stop = block[-1] + nchi
                skips = [
                    k + j for k in list(block[1:] - block[0] - 3)
                    for j in range(3)
                ]
                name = 'coef' if not i else 'coef{}'.format(i)
                col = self.pandas_dataframe(
                    block[0], stop, ncol + 1, skiprows=skips).drop(
                        0,
                        axis=1,
                    ).unstack().dropna().reset_index(drop=True)
                data[name] = col
            norb = len(data.index) // nchi
            data['orbital'] = np.concatenate(
                [np.repeat(range(i, norb, ncol), nchi) for i in range(ncol)])
            data['chi'] = np.tile(range(nchi), norb)
            data['frame'] = 0
            if rowmajor:
                data.rename(columns={
                    'orbital': 'chi',
                    'chi': 'orbital'
                },
                            inplace=True)
                data.sort_values(by=['orbital', 'chi'], inplace=True)
            self.momatrix = data
        else:
            print('Symmetrized calcs not supported yet.')

    def parse_sphr_momatrix(self, verbose=False):
        """
        Parser localized momatrix (if present).

        If the ``locorb`` keyword is used in ADF, an additional momatrix is
        printed after localization is performed. Parsing this table allows
        for visualization of these orbitals.

        Note:
            The attr :attr:`~exatomic.adf.output._re_loc_mo` is used for parsing this
            section.
        """
        _re_loc_mo = ("Localized MOs expanded in CFs+SFOs",
                      "SFO contributions (%) per Localized Orbital")
        found = self.find(*_re_loc_mo)
        if len(found[_re_loc_mo[0]]) == 0:
            if verbose:
                print("No localization performed.")
            return  # Nothing to parse
        start = found[_re_loc_mo[0]][0][0] + 8
        stop = found[_re_loc_mo[1]][0][0] - 4
        # Parse the localized momatrix as a whole block of text
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(16, 9, 9, 9, 9, 9, 9, 9, 9),
                         header=None)
        del df[0]
        # Identify the eigenvectors and (un)stack them correctly
        n = df[df[1].isnull()].index[0]  # number of basis functions
        m = np.ceil(df.shape[0] / n).astype(
            int)  # number of printed blocks of text
        # idx - indexes of "lines" (rows) that don't contain coefficients
        idx = [(n + 5) * j + i - 5 for j in range(1, m) for i in range(0, 5)]
        df = df[~df.index.isin(idx)]
        coefs = []
        for i in range(0, df.shape[0] // n + 1):
            d = df.iloc[n * (i - 1):n * i, :]
            coefs.append(d.unstack().dropna().values.astype(float))
        coefs = np.concatenate(coefs)
        m = coefs.shape[0] // n  # Number of localized MOs
        momatrix = pd.DataFrame.from_dict({
            'coef':
            coefs,
            'orbital': [i for i in range(m) for _ in range(n)],
            'chi': [j for _ in range(m) for j in range(n)]
        })
        momatrix['frame'] = self.atom['frame'].unique()[-1]
        self.sphr_momatrix = momatrix

    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)
Example #8
0
class TestBasisSet(TestCase):
    def setUp(self):
        adict = {col: [0] for col in BasisSet._columns}
        adict['frame'] = 0
        # Trivial basis set
        self.bs = BasisSet(adict)
        self.bs['alpha'] = self.bs['alpha'].astype(np.float64)
        self.bs['d'] = self.bs['d'].astype(np.float64)
        # Medium basis set
        self.mbs = BasisSet({
            'frame': 0,
            'alpha': [5., 1., 1.],
            'd': [1., 1., 1.],
            'shell': [0, 1, 0],
            'set': [0, 0, 1],
            'L': [0, 1, 0],
            'n': [1, 2, 1]
        })
        # Large basis set
        self.lbs = BasisSet({
            'frame': 0,
            'alpha': [5., 3., 1., 3., 1., 1., 3., 1., 1.],
            'd': [1., 1., 1., 1., 1., 1., 1., 1., 1.],
            'shell': [0, 0, 0, 1, 1, 2, 0, 0, 1],
            'set': [0, 0, 0, 0, 0, 0, 1, 1, 1],
            'L': [0, 0, 0, 1, 1, 2, 0, 0, 1]
        })

    def test_lmax(self):
        self.assertEqual(self.bs.lmax, 0)
        self.assertEqual(self.mbs.lmax, 1)
        self.assertEqual(self.lbs.lmax, 2)

    def test_shells(self):
        self.bs.shells()
        self.mbs.shells()
        self.lbs.shells()

    def test_functions_by_shell(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue((self.bs.functions_by_shell() == pd.Series(
            [1], index=mfp([[0], [0]], names=n))).all())
        self.assertTrue((self.mbs.functions_by_shell() == pd.Series(
            [1, 1, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.functions_by_shell() == pd.Series(
            [1, 1, 1, 1, 1],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())

    def test_primitives_by_shell(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue((self.bs.primitives_by_shell() == pd.Series(
            [1], index=mfp([[0], [0]], names=n))).all())
        self.assertTrue((self.mbs.primitives_by_shell() == pd.Series(
            [1, 1, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.primitives_by_shell() == pd.Series(
            [3, 2, 1, 2, 1],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())

    def test_functions(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue(
            (self.bs.functions(False) == pd.Series([1],
                                                   index=mfp([[0], [0]],
                                                             names=n))).all())
        self.assertTrue(
            (self.bs.functions(True) == pd.Series([1],
                                                  index=mfp([[0], [0]],
                                                            names=n))).all())
        self.assertTrue((self.mbs.functions(False) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.mbs.functions(True) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.functions(False) == pd.Series(
            [1, 3, 6, 1, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())
        self.assertTrue((self.lbs.functions(True) == pd.Series(
            [1, 3, 5, 1, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())

    def test_primitives(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue(
            (self.bs.primitives(False) == pd.Series([1],
                                                    index=mfp([[0], [0]],
                                                              names=n))).all())
        self.assertTrue(
            (self.bs.primitives(True) == pd.Series([1],
                                                   index=mfp([[0], [0]],
                                                             names=n))).all())
        self.assertTrue((self.mbs.primitives(False) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.mbs.primitives(True) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.primitives(False) == pd.Series(
            [3, 6, 6, 2, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())
        self.assertTrue((self.lbs.primitives(True) == pd.Series(
            [3, 6, 5, 2, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())
Example #9
0
class Output(six.with_metaclass(OutMeta, Editor)):
    """The ADF output parser."""
    def parse_atom(self):
        # TODO : only supports single frame, gets last atomic positions
        #        this will actually get the very first coordinates
        #_re_atom_00 = 'Atoms in this Fragment     Cart. coord.s (Angstrom)'
        _re_atom_00 = 'ATOMS'
        found1 = self.find(_re_atom_00, keys_only=True)
        # use the regex instead of find because we have a similar search string in an nmr and
        # cpl calculation for the nuclear coordinates
        _reatom = "(?i)NUCLEAR COORDINATES"
        found2 = self.regex(_reatom, keys_only=True)
        # to find the optimized frames
        _reopt = "Coordinates (Cartesian)"
        found_opt = self.find(_reopt, keys_only=True)
        if found_opt:
            starts = np.array(found_opt) + 6
            stop = starts[0]
            while '------' not in self[stop]:
                stop += 1
            stops = starts + stop - starts[0]
            dfs = []
            for idx, (start, stop) in enumerate(zip(starts, stops)):
                # parse everything as they may be useful in the future
                df = self.pandas_dataframe(start, stop, ncol=11)
                # drop everything
                df.drop(list(range(5, 11)), axis='columns', inplace=True)
                # we read the coordinates in bohr so no need to convrt
                df.columns = ['set', 'symbol', 'x', 'y', 'z']
                df['set'] = df['set'].astype(int)
                df['Z'] = df['symbol'].map(sym2z)
                df['frame'] = idx
                df['set'] -= 1
                dfs.append(df)
            atom = pd.concat(dfs, ignore_index=True)
        elif found1:
            start = stop = found1[-1] + 4
            while self[stop].strip():
                stop += 1
            atom = self.pandas_dataframe(start, stop, ncol=8)
            atom.drop(list(range(5, 8)), axis='columns', inplace=True)
            atom.columns = ['set', 'symbol', 'x', 'y', 'z']
            for c in ['x', 'y', 'z']:
                atom[c] *= Length['Angstrom', 'au']
            atom['Z'] = atom['symbol'].map(sym2z)
            atom['set'] -= 1
            atom['frame'] = 0
        elif found2:
            #if len(found) > 1:
            #    raise NotImplementedError("We can only parse outputs from a single NMR calculation")
            atom = []
            for idx, val in enumerate(found2):
                start = val + 3
                stop = start
                while self[stop].strip():
                    stop += 1
                # a bit of a hack to make sure that there is no formatting change depending on the
                # number of atoms in the molecule as the index is right justified so if there are
                # more than 100 atoms it will fill the alloted space for the atom index and change the
                # delimitter and therefore the number of columns
                self[start:stop] = map(lambda x: x.replace('(', ''),
                                       self[start:stop])
                df = self.pandas_dataframe(start, stop, ncol=5)
                df.columns = ['symbol', 'set', 'x', 'y', 'z']
                for c in ['x', 'y', 'z']:
                    df[c] *= Length['Angstrom', 'au']
                df['Z'] = df['symbol'].map(sym2z)
                df['frame'] = idx
                # remove the trailing chracters from the index
                df['set'] = list(map(lambda x: x.replace('):', ''), df['set']))
                df['set'] = df['set'].astype(int) - 1
                atom.append(df)
            atom = pd.concat(atom)
        else:
            raise NotImplementedError("We could not find the atom table in this output. Please submit "+ \
                                      "an issue ticket so we can add it in.")
        self.atom = atom

    def parse_basis_set(self):
        # Find the basis set
        _re_bas_00 = '(Slater-type)  F U N C T I O N S'
        _re_bas_01 = 'Atom Type'
        start = self.find(_re_bas_00, keys_only=True)[-1] + 3
        starts = self.find(_re_bas_01, start=start, keys_only=True)
        lines = []
        for ext in starts:
            for i in range(4):
                lines.append(start + ext + i)
            stop = start + ext + 4
            while self[stop].strip():
                lines.append(stop)
                stop += 1
        df = pd.read_fwf(StringIO('\n'.join([self[i] for i in lines])),
                         widths=[4, 2, 12, 4],
                         names=['n', 'L', 'alpha', 'symbol'])
        # Where atom types change
        idxs = [0] + df['n'][df['n'] == '---'].index.tolist() + [df.shape[0]]
        sets, shells = [], []
        for i, (start, stop) in enumerate(zip(idxs, idxs[1:])):
            sets.append(np.repeat(i - 1, stop - start))
            shells.append(np.arange(-1, stop - start - 1))
        df['set'] = np.concatenate(sets)
        df['shell'] = np.concatenate(shells)
        # Atom table basis set map
        basmap = df['symbol'].dropna()
        basmap = basmap[basmap.str.endswith(')')].str.strip(')')
        basmap = {
            val: df['set'][key] + 1
            for key, val in basmap.to_dict().items()
        }
        # Discard the garbage
        drop = df['n'].str.strip().str.isnumeric().fillna(False)
        df.drop(drop[drop == False].index, inplace=True)
        df.drop('symbol', axis=1, inplace=True)
        # Clean up the series
        df['alpha'] = df['alpha'].astype(np.float64)
        df['n'] = df['n'].astype(np.int64)
        df['L'] = df['L'].str.lower().map(lmap)
        df['d'] = np.sqrt((2 * df['L'] + 1) / (4 * np.pi))
        df['r'] = df['n'] - (df['L'] + 1)
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = False
        self.atom['set'] = self.atom['symbol'].map(basmap)

    def parse_basis_set_order(self):
        # All the columns we need
        data = defaultdict(list)
        sets = self.basis_set.groupby('set')
        # Iterate over atoms
        for center, symbol, seht in zip(self.atom.index, self.atom['symbol'],
                                        self.atom['set']):
            # Per basis set
            bas = sets.get_group(seht).groupby('L')
            for L, grp in bas:
                # Iterate over cartesians
                for l, m, n in enum_cartesian[L]:
                    for shell, r in zip(grp['shell'], grp['r']):
                        data['center'].append(center)
                        data['symbol'].append(symbol)
                        data['shell'].append(shell)
                        data['seht'].append(seht)
                        data['L'].append(L)
                        data['l'].append(l)
                        data['m'].append(m)
                        data['n'].append(n)
                        data['r'].append(r)
        data['set'] = data.pop('seht')
        data['frame'] = 0
        self.basis_set_order = pd.DataFrame.from_dict(data)
        self.basis_set_order['prefac'] = (
            self.basis_set_order['L'].apply(dfac21) /
            (self.basis_set_order['l'].apply(dfac21) *
             self.basis_set_order['m'].apply(dfac21) *
             self.basis_set_order['n'].apply(dfac21))).apply(np.sqrt)

    def parse_orbital(self):
        _re_orb_00 = 'Orbital Energies, both Spins'
        _re_orb_01 = 'Orbital Energies, per Irrep and Spin'
        found = self.find(_re_orb_00, _re_orb_01, keys_only=True)
        # Open shell vs. closed shell
        cols = {
            _re_orb_00:
            ['symmetry', 'vector', 'spin', 'occupation', 'energy', 'eV'],
            _re_orb_01: ['vector', 'occupation', 'energy', 'eV', 'dE']
        }
        key = _re_orb_00 if found[_re_orb_00] else _re_orb_01
        start = stop = found[key][-1] + 5
        while self[stop].strip():
            stop += 1
        df = self.pandas_dataframe(start, stop, cols[key])
        df['vector'] -= 1
        if 'spin' in cols[key]:
            df['spin'] = df.spin.map({'A': 0, 'B': 1})
            df.sort_values(by=['spin', 'energy'], inplace=True)
        else:
            df.sort_values(by='energy', inplace=True)
            df['spin'] = 0
        df.reset_index(drop=True, inplace=True)
        df['frame'] = df['group'] = 0
        self.orbital = df

    def parse_contribution(self):
        _re_con_00 = ('E(eV)  Occ       MO           %     '
                      'SFO (first member)   E(eV)  Occ   Fragment')
        # MO contribution by percentage
        found = self.find(_re_con_00, keys_only=True)
        starts = [i + 3 for i in found]
        widths = [12, 6, 6, 6, 11, 6, 10, 12, 6, 6, 3]
        names = [
            'eV', 'occupation', 'vector', 'sym', '%', 'SFO', 'angmom',
            'eV(sfo)', 'occ(sfo)', 'atom', 'symbol'
        ]
        dfs = []
        # Prints for both spins
        for i, start in enumerate(starts):
            stop = start
            while self[stop].strip():
                stop += 1
            dfs.append(
                pd.read_fwf(StringIO('\n'.join(self[start:stop])),
                            delim_whitespace=True,
                            widths=widths,
                            names=names))
            dfs[-1]['spin'] = i
        dfs = pd.concat(dfs).reset_index(drop=True)
        dfs = dfs.applymap(lambda x: np.nan if (isinstance(
            x, six.string_types) and x.isspace()) else x)
        dfs.fillna(method='ffill', inplace=True)
        # Clean up
        dfs['symbol'] = dfs['symbol'].str.strip()
        dfs['angmom'] = dfs['angmom'].str.strip()
        dfs['angmom'].update(dfs['angmom'].map({'S': 'S:'}))
        dfs[['L', 'ml']] = dfs['angmom'].str.extract('(.*):(.*)', expand=True)
        dfs['%'] = dfs['%'].str.replace('%', '')
        dfs['%'].update(dfs['%'].map({"    ******": np.inf}))
        dfs['%'] = dfs['%'].astype(np.float64)
        dfs['occupation'] = dfs['occupation'].astype(np.float64)
        dfs['vector'] = dfs['vector'].astype(np.int64) - 1
        dfs['eV'] = dfs['eV'].astype(np.float64)
        dfs['atom'] -= 1
        self.contribution = dfs

    def parse_excitation(self):
        # Excitation
        _re_exc_00 = '(sum=1) transition dipole moment'
        _re_exc_01 = ' no.     E/a.u.        E/eV      f           Symmetry'
        found = self.find_next(_re_exc_00, keys_only=True)
        if not found: return
        # First table of interest here
        start = found + 4
        stop = self.find_next(_re_exc_01, keys_only=True) - 3
        os = len(self[start].split()) == 9
        todrop = ['occ:', 'virt:']
        cols = [
            'excitation', 'occ', 'drop', 'virt', 'weight', 'TDMx', 'TDMy',
            'TDMz'
        ]
        if os: cols.insert(1, 'spin')
        if os: todrop = ['occ', 'virt']
        adf = self.pandas_dataframe(start, stop, cols)
        adf.drop('drop', axis=1, inplace=True)
        s1 = set(adf[cols[1]][adf[cols[1]] == 'NTO'].index)
        s2 = set(adf['excitation'][adf['excitation'].isin(todrop)].index)
        adf.drop(s1 | s2, axis=0, inplace=True)
        adf['excitation'] = adf['excitation'].str[:-1].astype(np.int64) - 1
        if os: adf['spin'] = adf['spin'].map({'Alph': 0, 'Beta': 1})
        adf[['occ', 'occsym']] = adf['occ'].str.extract('([0-9]*)(.*)',
                                                        expand=True)
        adf[['virt', 'virtsym']] = adf['virt'].str.extract('([0-9]*)(.*)',
                                                           expand=True)
        adf['occ'] = adf['occ'].astype(np.int64) - 1
        adf['virt'] = adf['virt'].astype(np.int64) - 1
        # Second one here
        start = stop + 5
        stop = start
        while self[stop].strip():
            stop += 1
        cols = _re_exc_01.split()
        df = self.pandas_dataframe(start, stop + 1, cols)
        df.drop(cols[0], axis=1, inplace=True)
        df.columns = ['energy', 'eV', 'osc', 'symmetry']
        # Expand the second table to fit the original
        for col in df.columns:
            adf[col] = adf.excitation.map(df[col])
        adf['frame'] = adf['group'] = 0
        self.excitation = adf

    def parse_momatrix(self):
        _re_mo_00 = 'Eigenvectors .* in BAS representation'
        _re_mo_01 = 'row '
        _re_mo_02 = 'nosym'
        found = self.regex(_re_mo_00,
                           _re_mo_01,
                           _re_mo_02,
                           flags=re.IGNORECASE,
                           keys_only=True)
        if not found[_re_mo_00] or not found[_re_mo_01]: return
        if found[_re_mo_02]:
            thresh = found[_re_mo_00][0]
            rowmajor = 'rows' in self[thresh]
            starts = np.array([i for i in found[_re_mo_01] if i > thresh]) + 1
            nchi = starts[1] - starts[0] - 3
            ncol = len(self[starts[0] + 1].split()) - 1
            if len(starts) % 2: os = False
            else:
                anchor = starts[len(starts) // 2 - 1] + nchi
                sail = starts[len(starts) // 2]
                os = True if self.find('SPIN 2', start=anchor,
                                       stop=sail) else False
            blocks = [starts] if not os else [
                starts[:len(starts) // 2], starts[len(starts) // 2:]
            ]
            data = pd.DataFrame()
            for i, block in enumerate(blocks):
                stop = block[-1] + nchi
                skips = [
                    k + j for k in list(block[1:] - block[0] - 3)
                    for j in range(3)
                ]
                name = 'coef' if not i else 'coef{}'.format(i)
                col = self.pandas_dataframe(
                    block[0], stop, ncol + 1, skiprows=skips).drop(
                        0,
                        axis=1,
                    ).unstack().dropna().reset_index(drop=True)
                data[name] = col
            norb = len(data.index) // nchi
            data['orbital'] = np.concatenate(
                [np.repeat(range(i, norb, ncol), nchi) for i in range(ncol)])
            data['chi'] = np.tile(range(nchi), norb)
            data['frame'] = 0
            if rowmajor:
                data.rename(columns={
                    'orbital': 'chi',
                    'chi': 'orbital'
                },
                            inplace=True)
                data.sort_values(by=['orbital', 'chi'], inplace=True)
            self.momatrix = data
        else:
            print('Symmetrized calcs not supported yet.')

    def parse_sphr_momatrix(self, verbose=False):
        """
        Parser localized momatrix (if present).

        If the ``locorb`` keyword is used in ADF, an additional momatrix is
        printed after localization is performed. Parsing this table allows
        for visualization of these orbitals.

        Note:
            The attr :attr:`~exatomic.adf.output._re_loc_mo` is used for parsing this
            section.
        """
        _re_loc_mo = ("Localized MOs expanded in CFs+SFOs",
                      "SFO contributions (%) per Localized Orbital")
        found = self.find(*_re_loc_mo)
        if len(found[_re_loc_mo[0]]) == 0:
            if verbose:
                print("No localization performed.")
            return  # Nothing to parse
        start = found[_re_loc_mo[0]][0][0] + 8
        stop = found[_re_loc_mo[1]][0][0] - 4
        # Parse the localized momatrix as a whole block of text
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(16, 9, 9, 9, 9, 9, 9, 9, 9),
                         header=None)
        del df[0]
        # Identify the eigenvectors and (un)stack them correctly
        n = df[df[1].isnull()].index[0]  # number of basis functions
        m = np.ceil(df.shape[0] / n).astype(
            int)  # number of printed blocks of text
        # idx - indexes of "lines" (rows) that don't contain coefficients
        idx = [(n + 5) * j + i - 5 for j in range(1, m) for i in range(0, 5)]
        df = df[~df.index.isin(idx)]
        coefs = []
        for i in range(0, df.shape[0] // n + 1):
            d = df.iloc[n * (i - 1):n * i, :]
            coefs.append(d.unstack().dropna().values.astype(float))
        coefs = np.concatenate(coefs)
        m = coefs.shape[0] // n  # Number of localized MOs
        momatrix = pd.DataFrame.from_dict({
            'coef':
            coefs,
            'orbital': [i for i in range(m) for _ in range(n)],
            'chi': [j for _ in range(m) for j in range(n)]
        })
        momatrix['frame'] = self.atom['frame'].unique()[-1]
        self.sphr_momatrix = momatrix

    def parse_gradient(self):
        _regrad = "Energy gradients wrt nuclear displacements"
        found = self.find(_regrad, keys_only=True)
        if not found:
            return
        starts = np.array(found) + 6
        stop = starts[0]
        while '----' not in self[stop]:
            stop += 1
        stops = starts + (stop - starts[0])
        dfs = []
        for i, (start, stop) in enumerate(zip(starts, stops)):
            df = self.pandas_dataframe(start, stop, ncol=5)
            df.columns = ['atom', 'symbol', 'fx', 'fy', 'fz']
            df['frame'] = i
            df['atom'] -= 1
            dfs.append(df)
        grad = pd.concat(dfs, ignore_index=True)
        grad['Z'] = grad['symbol'].map(sym2z)
        grad = grad[['atom', 'Z', 'fx', 'fy', 'fz', 'symbol', 'frame']]
        for u in ['fx', 'fy', 'fz']:
            grad[u] *= 1. / Length['Angstrom', 'au']
        self.gradient = grad

    def parse_frequency(self):
        _renorm = "Vibrations and Normal Modes"
        _refreq = "List of All Frequencies:"
        found = self.find(_refreq, keys_only=True)
        if not found:
            return
        elif len(found) > 1:
            raise NotImplementedError(
                "We cannot parse more than one frequency calculation in a single output"
            )
        found = self.find(_refreq, _renorm, keys_only=True)
        start = found[_refreq][0] + 9
        stop = start
        while self[stop]:
            stop += 1
        df = self.pandas_dataframe(start, stop, ncol=3)
        freqs = df[0].values
        n = int(np.ceil(freqs.shape[0] / 3))
        start = found[_renorm][0] + 9
        stop = start
        while self[stop]:
            stop += 1
        natoms = stop - start
        dfs = []
        fdx = 0
        for i in range(n):
            if i == 0:
                start = found[_renorm][0] + 9
            else:
                start = stop + 4
            stop = start + natoms
            freqs = list(map(lambda x: float(x), self[start - 2].split()))
            ncol = len(freqs)
            df = self.pandas_dataframe(start, stop, ncol=1 + 3 * ncol)
            tmp = list(map(lambda x: x.split('.'), df[0]))
            index, symbol = list(map(list, zip(*tmp)))
            slices = [list(range(1 + i, 1 + 3 * ncol, 3)) for i in range(ncol)]
            dx, dy, dz = [df[i].unstack().values for i in slices]
            freqdx = np.repeat(list(range(fdx, ncol + fdx)), natoms)
            zs = pd.Series(symbol).map(sym2z)
            freqs = np.repeat(freqs, natoms)
            stacked = pd.DataFrame.from_dict({
                'Z': np.tile(zs, ncol),
                'label': np.tile(index, ncol),
                'dx': dx,
                'dy': dy,
                'dz': dz,
                'frequency': freqs,
                'freqdx': freqdx
            })
            stacked['ir_int'] = 0.0
            stacked['symbol'] = np.tile(symbol, ncol)
            dfs.append(stacked)
            fdx += ncol
        frequency = pd.concat(dfs, ignore_index=True)
        frequency['frame'] = 0
        # TODO: check units of the normal modes
        self.frequency = frequency

    def parse_nmr_shielding(self):
        _reatom = "N U C L E U S :"
        _reshield = "==== total shielding tensor"
        _renatom = "NUCLEAR COORDINATES (ANGSTROMS)"
        found = self.find(_reatom, keys_only=True)
        if not found:
            #raise NotImplementedError("Could not find {} in output".format(_reatom))
            return
        ncalc = self.find(_renatom, keys_only=True)
        ncalc.append(len(self))
        ndx = 0
        dfs = []
        for start in found:
            try:
                ndx = ndx if start > ncalc[ndx] and start < ncalc[
                    ndx + 1] else ndx + 1
            except IndexError:
                raise IndexError(
                    "It seems that there was an issue with determining which NMR calculation we are in"
                )
            start_shield = self.find(_reshield, keys_only=True,
                                     start=start)[0] + start + 2
            end_shield = start_shield + 3
            symbol, index = self[start].split()[-1].split('(')
            index = int(index.replace(')', ''))
            isotropic = float(self[start_shield + 4].split()[-1])
            df = self.pandas_dataframe(start_shield, end_shield, ncol=3)
            cols = ['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']
            df = pd.DataFrame(df.unstack().values.reshape(1, 9), columns=cols)
            df['isotropic'] = isotropic
            df['atom'] = index - 1
            df['symbol'] = symbol
            df['label'] = 'nmr shielding'
            df['frame'] = ndx
            dfs.append(df)
        shielding = pd.concat(dfs, ignore_index=True)
        self.nmr_shielding = shielding

    def parse_j_coupling(self):
        _recoupl = "total calculated spin-spin coupling:"
        _reatom = "Internal CPL numbering of atoms:"
        found = self.find(_reatom, keys_only=True)
        if not found:
            return
        found = self.find(_reatom, _recoupl, keys_only=True)
        # we grab the tensors inside the principal axis representation
        # for the cartesian axis representation we start the list at 0 and grab every other instance
        start_coupl = found[_recoupl][1::2]
        start_pert = np.array(found[_reatom]) - 3
        dfs = []
        # grab atoms
        cols = ['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']
        for ln, start in zip(start_pert, start_coupl):
            line = self[ln].split()
            # we just replace all of the () in the strings
            pert_nucl = list(
                map(lambda x: x.replace('(', '').replace(')', ''), line[5:]))
            nucl = list(
                map(lambda x: x.replace('(', '').replace(')', ''), line[1:3]))
            # grab both tensors
            df = self.pandas_dataframe(start + 2, start + 5, ncol=6)
            # this will grab the iso value and tensor elements for the j coupling in hz
            df.drop(range(3), axis='columns', inplace=True)
            df = pd.DataFrame(df.unstack().values.reshape(1, 9), columns=cols)
            iso = self[start + 1].split()[-1]
            # place all of the dataframe columns
            df['isotropic'] = float(iso)
            df['atom'] = int(nucl[0])
            df['symbol'] = nucl[1]
            df['pt_atom'] = int(pert_nucl[0])
            df['pt_symbol'] = pert_nucl[1]
            df['label'] = 'j coupling'
            df['frame'] = 0
            dfs.append(df)
        # put everything together
        j_coupling = pd.concat(dfs, ignore_index=True)
        j_coupling['atom'] -= 1
        j_coupling['pt_atom'] -= 1
        self.j_coupling = j_coupling

    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)
Example #10
0
class Output(six.with_metaclass(OutMeta, Editor)):
    """Editor for NWChem calculation output file (stdout)."""
    def parse_atom(self):
        """Parse the atom dataframe."""
        _reatom01 = 'Geometry "'
        _reatom02 = 'Atomic Mass'
        _reatom03 = 'ECP       "ecp basis"'
        _reatom04 = 'Output coordinates in'
        found = self.find(_reatom01,
                          _reatom02,
                          _reatom03,
                          _reatom04,
                          keys_only=True)
        unit = self[found[_reatom04][0]].split()[3]
        unit = "Angstrom" if unit == "angstroms" else "au"
        starts = np.array(found[_reatom01]) + 7
        stops = np.array(found[_reatom02]) - 1
        ecps = np.array(found[_reatom03]) + 2
        ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps}
        columns = ['label', 'tag', 'Z', 'x', 'y', 'z']
        atom = pd.concat([
            self.pandas_dataframe(s, e, columns)
            for s, e in zip(starts, stops)
        ])
        atom['symbol'] = atom['tag'].str.extract(
            '([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title()
        atom['Z'] = atom['Z'].astype(np.int64)
        atom['Zeff'] = (atom['Z'] -
                        atom['tag'].map(ecps).fillna(value=0)).astype(np.int64)
        #n = len(atom)
        nf = atom.label.value_counts().max()
        nat = atom.label.max()
        atom['frame'] = [i for i in range(nf) for j in range(nat)]
        atom['label'] -= 1
        atom['x'] *= Length[unit, 'au']
        atom['y'] *= Length[unit, 'au']
        atom['z'] *= Length[unit, 'au']
        if atom['frame'].max() > 0:
            li = atom['frame'].max()
            atom = atom[~(atom['frame'] == li)]
            atom.reset_index(drop=True, inplace=True)
        del atom['label']
        self.atom = Atom(atom)

    def parse_orbital(self):
        """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe."""
        orbital = None
        _remo01 = 'Molecular Orbital Analysis'
        _remo02 = 'alpha - beta orbital overlaps'
        _remo03 = 'center of mass'
        check = self.find(_remo01)
        if any(['Alpha' in value for value in check]):
            alpha_starts = np.array(
                [no
                 for no, line in check if 'Alpha' in line], dtype=np.int64) + 2
            alpha_stops = np.array(
                [no
                 for no, line in check if 'Beta' in line], dtype=np.int64) - 1
            beta_starts = alpha_stops + 3
            beta_stops = np.array(self.find(_remo02, keys_only=True),
                                  dtype=np.int64) - 1
            alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops)
            beta_orbital = self._parse_orbital(beta_starts, beta_stops)
            alpha_orbital['spin'] = 0
            beta_orbital['spin'] = 1
            orbital = pd.concat((alpha_orbital, beta_orbital),
                                ignore_index=True)
        else:
            starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2
            stops = np.array(self.find(_remo03, keys_only=True),
                             dtype=np.int64) - 1
            orbital = self._parse_orbital(starts, stops)
            orbital['spin'] = 0
        orbital['group'] = 0
        self.orbital = Orbital(orbital)

    def parse_momatrix(self):
        """
        Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe.

        Note:
            Must supply 'print "final vectors" "final vectors analysis"' for momatrix
        """
        key0 = "Final MO vectors"
        key1 = "center of mass"
        found = self.find(key0, key1)
        if found[key0]:
            start = found[key0][0][0] + 6
            end = found[key1][0][0] - 1
            c = pd.read_fwf(StringIO("\n".join(self[start:end])),
                            widths=(6, 12, 12, 12, 12, 12, 12),
                            names=list(range(7)))
            self.c = c
            idx = c[c[0].isnull()].index.values
            c = c[~c.index.isin(idx)]
            del c[0]
            nbas = len(self.basis_set_order)
            n = c.shape[0] // nbas
            coefs = []
            # The for loop below is like numpy.array_split(df, n); using numpy.array_split
            # with dataframes seemed to have strange results where splits had wrong sizes?
            for i in range(n):
                coefs.append(c.iloc[i * nbas:(i + 1) *
                                    nbas, :].astype(float).dropna(
                                        axis=1).values.ravel("F"))
            c = np.concatenate(coefs)
            del coefs
            orbital, chi = _square_indices(len(self.basis_set_order))
            self.momatrix = MOMatrix.from_dict({
                'coef': c,
                'chi': chi,
                'orbital': orbital,
                'frame': 0
            })
            # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital})
            # momatrix['frame'] = 0
            # self.momatrix = momatrix

    def _parse_orbital(self, starts, stops):
        '''
        This function actually performs parsing of :class:`~exatomic.orbital.Orbital`

        See Also:
            :func:`~exnwchem.output.Output.parse_orbital`
        '''
        joined = '\n'.join(
            ['\n'.join(self[s:e]) for s, e in zip(starts, stops)])
        nvec = joined.count('Vector')
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        nbas *= nvec
        # Orbital dataframe -- alternatively one could parse the strings
        # into the DataFrame and then use the pd.Series.str methods to
        # perform all the replacements at the same time, eg. 'D' --> 'E'
        # and 'Occ=' --> '', etc.
        orb_no = np.empty((nvec, ), dtype=np.int64)
        occ = np.empty((nvec, ), dtype=np.float64)
        nrg = np.empty((nvec, ), dtype=np.float64)
        x = np.empty((nvec, ), dtype=np.float64)
        y = np.empty((nvec, ), dtype=np.float64)
        z = np.empty((nvec, ), dtype=np.float64)
        frame = np.empty((nvec, ), dtype=np.int64)
        fc = -1  # Frame counter
        oc = 0  # Orbital counter
        for s, e in zip(starts, stops):
            fc += 1
            for line in self[s:e]:
                ls = line.split()
                if 'Vector' in line:
                    orb_no[oc] = ls[1]
                    occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E')
                    nrg[oc] = ls[3].replace('E=', '').replace(
                        'D', 'E') if 'E=-' in line else ls[4].replace(
                            'D', 'E')
                    frame[oc] = fc
                elif 'MO Center' in line:
                    x[oc] = ls[2].replace(',', '').replace('D', 'E')
                    y[oc] = ls[3].replace(',', '').replace('D', 'E')
                    z[oc] = ls[4].replace(',', '').replace('D', 'E')
                    oc += 1
        orb_no -= 1
        return pd.DataFrame.from_dict({
            'x': x,
            'y': z,
            'z': z,
            'frame': frame,
            'vector': orb_no,
            'occupation': occ,
            'energy': nrg
        })

    def parse_basis_set(self):
        """
        Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
        """
        if not hasattr(self, "atom"):
            self.parse_atom()
        _rebas01 = ' Basis "'
        _rebas02 = ' Summary of "'
        _rebas03 = [
            ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ',
            ' j ', ' k ', ' l ', ' m ', ' p '
        ]
        found = self.find(_rebas01, _rebas02)
        spherical = True if "spherical" in found[_rebas01][0][1] else False
        start = found[_rebas01][0][0] + 2
        idx = 1 if len(found[_rebas02]) > 1 else -1
        stop = found[_rebas02][idx][0] - 1
        # Read in all of the extra lines that contain ---- and tag names
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(4, 2, 16, 16),
                         names=("shell", "L", "alpha", "d"))
        df.loc[df['shell'] == "--", "shell"] = np.nan
        tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
        idxs = tags.index.tolist()
        idxs.append(len(df))
        df['set'] = ""
        for i, tag in enumerate(tags):
            df.loc[idxs[i]:idxs[i + 1], "set"] = tag
        df = df.dropna().reset_index(drop=True)
        mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
        df['set'] = df['set'].map(mapper)
        df['L'] = df['L'].str.strip().str.lower().map(lmap)
        df['alpha'] = df['alpha'].astype(float)
        df['d'] = df['d'].astype(float)
        # NO SUPPORT FOR MULTIPLE FRAMES?
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = spherical
        self.atom['set'] = self.atom['tag'].map(mapper)

    def parse_basis_set_order(self):
        dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')]
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        if self.meta['spherical']:
            dtype += [('ml', 'i8')]
        else:
            dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')]
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        bso = np.empty((nbas, ), dtype=dtype)
        cnt = 0
        bases = self.basis_set.groupby('set')
        for seht, center in zip(self.atom['set'], self.atom.index):
            bas = bases.get_group(seht).groupby('shell')
            if self.meta['spherical']:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for ml in spherical_ordering_function(l):
                        bso[cnt] = (center, shell, l, ml)
                        cnt += 1
            else:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for _, ll, m, n in cartesian_ordering_function(l):
                        bso[cnt] = (center, shell, l, ll, m, n)
                        cnt += 1
        bso = pd.DataFrame(bso)
        bso['frame'] = 0
        # New shell definition consistent with basis internals
        shls = []
        grps = bso.groupby(['center', 'L'])
        cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        for (cen, L), grp in grps:
            for ml in grp['ml']:
                shls.append(cache[cen][L][ml])
                cache[cen][L][ml] += 1
        bso['shell'] = shls
        self.basis_set_order = bso

    def parse_frame(self):
        """
        Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed)
        :class:`~exatomic.core.atom.Atom` object.
        """
        _rescfen = 'Total SCF energy'
        _redften = 'Total DFT energy'
        self.frame = compute_frame_from_atom(self.atom)
        found = self.find(_rescfen, _redften)
        scfs = found[_rescfen]
        dfts = found[_redften]
        if scfs and dfts:
            print('Warning: found total energies from scf and dft, using dft')
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts
        elif scfs:
            scfs = [float(val.split()[-1]) for key, val in scfs]
            self.frame['total_energy'] = scfs
        elif dfts:
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts

    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)
Example #11
0
class Output(six.with_metaclass(GauMeta, Editor)):
    def _parse_triangular_matrix(self, regex, column='coef', values_only=False):
        _rebas01 = r'basis functions,'
        found = self.find_next(_rebas01, keys_only=True)
        nbas = int(self[found].split()[0])
        found = self.find_next(regex, keys_only=True)
        if not found: return
        ncol = len(self[found + 1].split())
        start = found + 2
        rmdr = nbas % ncol
        skips = np.array(list(reversed(range(rmdr, nbas + max(1, rmdr), ncol))))
        skips = np.cumsum(skips) + np.arange(len(skips))
        stop = start + skips[-1]
        matrix = self.pandas_dataframe(start, stop, ncol + 1,
                                       index_col=0, skiprows=skips,
                                       ).unstack().dropna().apply(
                                       lambda x: x.replace('D', 'E')
                                       ).astype(np.float64).values
        if values_only: return matrix
        idxs = _triangular_indices(ncol, nbas)
        return pd.DataFrame.from_dict({'chi0': idxs[:,0],
                                       'chi1': idxs[:,1],
                                      'frame': idxs[:,2],
                                       column: matrix})

    def parse_atom(self):
        # Atom flags
        _regeom01 = 'Input orientation'
        _regeom02 = 'Standard orientation'
        # Find our data
        found = self.find(_regeom01, _regeom02, keys_only=True)
        # Check if nosymm was specified
        key = _regeom02 if found[_regeom02] else _regeom01
        starts = np.array(found[key]) + 5
        # Prints converged geometry twice but only need it once
        starts = starts[:-1] if len(starts) > 1 else starts
        stop = starts[0]
        # Find where the data stops
        while '-------' not in self[stop]: stop += 1
        # But it should be same sized array each time
        stops = starts + (stop - starts[0])
        dfs = []
        # Iterate over frames
        for i, (start, stop) in enumerate(zip(starts, stops)):
            atom = self.pandas_dataframe(start, stop, 6)
            atom['frame'] = i
            dfs.append(atom)
        atom = pd.concat(dfs).reset_index(drop=True)
        # Drop the column of atomic type (whatever that is)
        atom.drop([2], axis=1, inplace=True)
        # Name the data
        atom.columns = ['set', 'Z', 'x', 'y', 'z', 'frame']
        # Zero-based indexing
        atom['set'] -= 1
        # Convert to atomic units
        atom['x'] *= Length['Angstrom', 'au']
        atom['y'] *= Length['Angstrom', 'au']
        atom['z'] *= Length['Angstrom', 'au']
        # Map atomic symbols onto Z numbers
        atom['symbol'] = atom['Z'].map(z2sym)
        self.atom = atom

    def parse_basis_set(self):
        # Basis flags
        _rebas02 = 'AO basis set in the form of general basis input'
        _rebas03 = ' (Standard|General) basis'
        _basrep = {'D 0': 'D0 ', 'F 0': 'F0 ',
                   'G 0': 'G0 ', 'H 0': 'H0 ', 'I 0': 'I0 '}
        _rebaspat = re.compile('|'.join(_basrep.keys()))
        # Find the basis set
        found = self.regex(_rebas02, _rebas03, keys_only=True)
        if not found[_rebas02]: return
        start = stop = found[_rebas02][0] + 1
        while self[stop].strip(): stop += 1
        # Raw data
        df = self.pandas_dataframe(start, stop, 4)
        def _padx(srs): return [0] + srs.tolist() + [df.shape[0]]
        # Get some indices for appropriate columns
        setdx = _padx(df[0][df[0] == '****'].index)
        shldx = _padx(df[3][~np.isnan(df[3])].index)
        lindx = df[0][df[0].str.lower().isin(lorder + ['sp'])]
        # Populate the df
        df['L'] = lindx.str.lower().map(lmap)
        df['L'] = df['L'].fillna(method='ffill').fillna(
                                 method='bfill').astype(np.int64)
        df['center'] = np.concatenate([np.repeat(i, stop - start)
                       for i, (start, stop) in enumerate(zip(setdx, setdx[1:]))])
        df['shell'] = np.concatenate([np.repeat(i-1, stop - start)
                      for i, (start, stop) in enumerate(zip(shldx, shldx[1:]))])
        # Complicated way to get shells but it is flat
        maxshl = df.groupby('center').apply(lambda x: x.shell.max() + 1)
        maxshl.index += 1
        maxshl[0] = 0
        df['shell'] = df['shell'] - df['center'].map(maxshl)
        # Drop all the garbage
        todrop = setdx[:-1] + [i+1 for i in setdx[:-2]] + lindx.index.tolist()
        df.drop(todrop, inplace=True)
        # Keep cleaning
        if df[0].dtype == 'object':
            df[0] = df[0].str.replace('D', 'E').astype(np.float64)
        if df[1].dtype == 'object':
            df[1] = df[1].str.replace('D', 'E').astype(np.float64)
        try: sp = np.isnan(df[2]).sum() == df.shape[0]
        except TypeError:
            df[2] = df[2].str.replace('D', 'E').astype(np.float64)
            sp = True
        df.rename(columns={0: 'alpha', 1: 'd'}, inplace=True)
        # Deduplicate basis sets and expand 'SP' shells if present
        df, setmap = deduplicate_basis_sets(df, sp=sp)
        spherical = '5D' in self[found[_rebas03][0]]
        if df['L'].max() < 2:
            spherical = True
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = spherical
        self.atom['set'] = self.atom['set'].map(setmap)


    def parse_orbital(self):
        _rebas01 = r'basis functions,'
        # Orbital flags
        _realphaelec = 'alpha electrons'
        _reorb01 = '(?=Alpha|Beta).*(?=occ|virt)'
        _reorb02 = 'Orbital symmetries'
        _orbslice = [slice(10 * i, 10 * i + 9) for i in range(5)]
        _symrep = {'Occupied': '', 'Virtual': '', 'Alpha Orbitals:': '',
                   'Beta  Orbitals:': '', '\(': '', '\)': ''}
        _resympat = re.compile('|'.join(_symrep.keys()))
        _symrep['('] = ''
        _symrep[')'] = ''
        # Find where our data is
        found = self.regex(_reorb01, _reorb02, _rebas01, _realphaelec)
        # If no orbital energies, quit
        if not found[_reorb01]: return
        # Check if open shell
        os = any(('Beta' in ln for lno, ln in found[_reorb01]))
        #UNUSED?
        #occ = 1 if os else 2
        # Find number of electrons
        ae, x, x, be, x, x = found[_realphaelec][0][1].split()
        ae, be = int(ae), int(be)
        # Get orbital energies
        ens = '\n'.join([ln.split('-- ')[1] for i, ln in found[_reorb01]])
        ens = pd.read_fwf(six.StringIO(ens), header=None,
                          widths=np.repeat(10, 5)).stack().values
        # Other arrays
        orbital = Orbital.from_energies(ens, ae, be, os=os)
        # Symmetry labels
        if found[_reorb02]:
            # Gaussian seems to print out a lot of these blocks
            # maybe a better way to deal with this
            allsyms = []
            match = ['(', 'Orbitals']
            for i, (start, ln) in enumerate(found[_reorb02]):
                # Find the start, stop indices for each block
                while match[0] not in self[start]: start += 1
                stop = start + 1
                while any((i in self[stop] for i in match)): stop += 1
                # Clean up the text block so it is just symmetries
                syms = _resympat.sub(lambda m: _symrep[m.group(0)],
                                     ' '.join([i.strip() for i in
                                     self[start:stop]])).split()
                # cat the syms for each block together
                allsyms += syms
            # Add it to our dataframe
            orbital['symmetry'] = allsyms[-orbital.shape[0]:]
        self.orbital = orbital


    def parse_momatrix(self):
        """
        Parses the MO matrix if asked for in the input.

        Note:
            Requires specification of pop(full) or pop(no) or the like.
        """
        if hasattr(self, '_momatrix'): return
        _rebas01 = r'basis functions,'
        # MOMatrix flags
        _remomat01 = r'pop.*(?=full|no)'
        _remomat02 = 'Orbital Coefficients'
        _basrep = {'D 0': 'D0 ', 'F 0': 'F0 ',
                   'G 0': 'G0 ', 'H 0': 'H0 ', 'I 0': 'I0 '}
        _rebaspat = re.compile('|'.join(_basrep.keys()))
        # Check if a full MO matrix was specified in the input
        check = self.regex(_remomat01, stop=1000, flags=re.IGNORECASE)
        if not check: return
        # Find approximately where our data is
        found = self.find(_remomat02, _rebas01)
        # Get some dimensions
        ndim = len(found[_remomat02])
        # If something goes wrong
        if not ndim: return
        nbas = int(found[_rebas01][0][1].split()[0])
        nblocks = np.int64(np.ceil(nbas / 5))
        # Allocate a big ol' array
        coefs = np.empty((nbas ** 2, ndim), dtype=np.float64)
        # Dynamic column generation hasn't been worked out yet
        colnames = ['coef'] + ['coef' + str(i) for i in range(1, ndim)]
        # Iterate over where the data was found
        # c counts the column in the resulting momatrix table
        _csv_args = {'delim_whitespace': True, 'header': None}
        for c, (lno, ln) in enumerate(found[_remomat02]):
            gap = 0
            while not 'eigenvalues' in self[lno + gap].lower(): gap += 1
            start = lno + gap + 1
            stop = start + nbas
            # The basis set order is printed with every chunk of eigenvectors
            if not c:
                mapr = self.basis_set.groupby(['set', 'L']).apply(
                        lambda x: x['shell'].unique()).to_dict()
                self.basis_set_order = _basis_set_order(self[start:stop], mapr,
                                                        self.atom['set'])
            # Some fudge factors due to extra lines being printed
            space = start - lno - 1
            fnbas = nbas + space
            span = start + fnbas * nblocks
            # Finally get where our chunks are
            starts = np.arange(start, span, fnbas)
            stops = np.arange(stop, span, fnbas)
            stride = 0
            # b counts the blocks of eigenvectors per column in momatrix
            for b, (start, stop) in enumerate(zip(starts, stops)):
                # Number of eigenvectors in this block
                ncol = len(self[start][21:].split())
                step = nbas * ncol
                _csv_args['names'] = range(ncol)
                # Massage the text so that we can read csv
                block = '\n'.join([ln[21:] for ln in self[start:stop]])
                block = _rebaspat.sub(lambda m: _basrep[m.group(0)], block)
                # Enplacen the resultant unstacked values
                coefs[stride:stride + nbas * ncol, c] = pd.read_fwf(
                        six.StringIO(block), header=None,
                        widths=np.repeat(10, 5)).unstack().dropna().values
                stride += step
        # Index chi, phi
        chis = np.tile(range(nbas), nbas)
        orbs = np.repeat(range(nbas), nbas)
        momatrix = pd.DataFrame(coefs, columns=colnames)
        momatrix['chi'] = chis
        momatrix['orbital'] = orbs
        # Frame not really implemented for momatrix
        momatrix['frame'] = 0
        self.momatrix = momatrix

    def parse_basis_set_order(self):
        if hasattr(self, '_basis_set_order'): return
        self.parse_momatrix()


    def parse_frame(self):
        # Frame flags
        _retoten = 'SCF Done:'
        _realphaelec = 'alpha electrons'
        _reelecstate = 'The electronic state'
        # Get the default frame from the atom table
        self.frame = compute_frame_from_atom(self.atom)
        # Find our data
        found = self.find(_retoten, _realphaelec, _reelecstate)
        # Extract just the total SCF energies
        ens = [float(ln.split()[4]) for lno, ln in found[_retoten]]
        # If 'SCF Done' prints out more times than frames
        try:
            ens = ens if len(self.frame) == len(ens) else ens[-len(self.frame):]
            self.frame['E_tot'] = ens
        except ValueError:
            pass
        # We will assume number of electrons doesn't change per frame
        ae, x, x, be, x, x = found[_realphaelec][0][1].split()
        self.frame['N_e'] = int(ae) + int(be)
        self.frame['N_a'] = int(ae)
        self.frame['N_b'] = int(be)
        # Try to get the electronic state but don't try too hard
        try:
            states = []
            #for lno, ln in found[_reelecstate]:
            for _, ln in found[_reelecstate]:
                if 'initial' in ln: continue
                states.append(ln.split()[4].replace('.', ''))
            self.frame['state'] = states
        except (IndexError, ValueError):
            pass


    def parse_excitation(self):
        # TDDFT flags
        _retddft = 'TD'
        _reexcst = 'Excited State'
        chk = self.find(_retddft, stop=1000, keys_only=True)
        if not chk: return
        # Find the data
        found = self.find(_reexcst)
        keeps, maps, summ = [], [] ,[]
        for i, (lno, ln) in enumerate(found):
            summ.append(ln)
            lno += 1
            while '->' in self[lno]:
                keeps.append(lno)
                maps.append(i)
                lno += 1
        cols = [0, 1, 2, 'kind', 'eV', 3, 'nm', 4, 'osc', 's2']
        summ = pd.read_csv(six.StringIO('\n'.join([ln for lno, ln in found])),
                           delim_whitespace=True, header=None, names=cols,
                           usecols=[c for c in cols if type(c) == str])
        summ['s2'] = summ['s2'].str[7:].astype(np.float64)
        summ['osc'] = summ['osc'].str[2:].astype(np.float64)
        cols = ['occ', 0, 'virt', 'cont']
        conts = pd.read_csv(six.StringIO('\n'.join([self[i] for i in keeps])),
                            delim_whitespace=True, header=None, names=cols,
                            usecols=[c for c in cols if type(c) == str])
        conts['map'] = maps
        for col in summ.columns:
            conts[col] = conts['map'].map(summ[col])
        conts['energy'] = conts['eV'] * Energy['eV', 'Ha']
        conts['frame'] = conts['group'] = 0
        self.excitation = conts


    def parse_frequency(self):
        # Frequency flags
        _refreq = 'Freq'
        found = self.regex(_refreq, stop=1000, flags=re.IGNORECASE)
        # Don't need the input deck or 2 from the summary at the end
        found = self.find(_refreq)[1:-2]
        if not found: return
        # Total lines per block minus the unnecessary ones
        span = found[1][0] - found[0][0] - 7
        dfs, fdx = [], 0
        # Iterate over what we found
        for lno, ln in found:
            # Get the frequencies first
            freqs = ln[15:].split()
            nfreqs = len(freqs)
            # Get just the atom displacement vectors
            start = lno + 5
            stop = start + span
            cols = range(2 + 3 * nfreqs)
            df = self.pandas_dataframe(start, stop, ncol=cols)
            # Split up the df and unstack it
            slices = [list(range(2 + i, 2 + 3 * nfreqs, 3)) for i in range(nfreqs)]
            dx, dy, dz = [df[i].unstack().values for i in slices]
            # Generate the appropriate dimensions of other columns
            labels = np.tile(df[0].values, nfreqs)
            zs = np.tile(df[1].values, nfreqs)
            freqdxs = np.repeat(range(fdx, fdx + nfreqs), df.shape[0])
            freqs = np.repeat(freqs, df.shape[0])
            fdx += nfreqs
            # Put it all together
            stacked = pd.DataFrame.from_dict({'Z': zs, 'label': labels,
                                    'dx': dx, 'dy': dy, 'dz': dz,
                                    'frequency': freqs, 'freqdx': freqdxs})
            stacked['symbol'] = stacked['Z'].map(z2sym)
            dfs.append(stacked)
        # Now put all our frequencies together
        frequency = pd.concat(dfs).reset_index(drop=True)
        # Pretty sure displacements are in cartesian angstroms
        # TODO: verify with an external program that vibrational
        #       modes look the same as the ones generated with
        #       this methodology.
        frequency['dx'] *= Length['Angstrom', 'au']
        frequency['dy'] *= Length['Angstrom', 'au']
        frequency['dz'] *= Length['Angstrom', 'au']
        # Frame not really implemented here either
        frequency['frame'] = 0
        self.frequency = frequency

    # Below are triangular matrices -- One electron integrals

    def parse_overlap(self):
        _reovl01 = '*** Overlap ***'
        overlap = self._parse_triangular_matrix(_reovl01, 'coef')
        if overlap is not None: self.overlap = overlap

    def parse_multipole(self):
        _reixn = 'IX=    {}'
        mltpl = self._parse_triangular_matrix(_reixn.format(1), 'ix1')
        if mltpl is not None:
            mltpl['ix2'] = self._parse_triangular_matrix(_reixn.format(2), 'ix2', True)
            mltpl['ix3'] = self._parse_triangular_matrix(_reixn.format(3), 'ix3', True)
            self.multipole = mltpl

    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)
Example #12
0
class Output(six.with_metaclass(OutMeta, Editor)):
    """Editor for NWChem calculation output file (stdout)."""
    def parse_atom(self):
        """Parse the atom dataframe."""
        _reatom01 = 'Geometry "'
        _reatom02 = 'Atomic Mass'
        _reatom03 = 'ECP       "ecp basis"'
        _reatom04 = 'Output coordinates in'
        found = self.find(_reatom01,
                          _reatom02,
                          _reatom03,
                          _reatom04,
                          keys_only=True)
        unit = self[found[_reatom04][0]].split()[3]
        unit = "Angstrom" if unit == "angstroms" else "au"
        starts = np.array(found[_reatom01]) + 7
        stops = np.array(found[_reatom02]) - 1
        ecps = np.array(found[_reatom03]) + 2
        ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps}
        columns = ['label', 'tag', 'Z', 'x', 'y', 'z']
        atom = pd.concat([
            self.pandas_dataframe(s, e, columns)
            for s, e in zip(starts, stops)
        ])
        atom['symbol'] = atom['tag'].str.extract(
            '([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title()
        atom['Z'] = atom['Z'].astype(np.int64)
        atom['Zeff'] = (atom['Z'] -
                        atom['tag'].map(ecps).fillna(value=0)).astype(np.int64)
        #n = len(atom)
        nf = atom.label.value_counts().max()
        nat = atom.label.max()
        atom['frame'] = [i for i in range(nf) for j in range(nat)]
        atom['label'] -= 1
        atom['x'] *= Length[unit, 'au']
        atom['y'] *= Length[unit, 'au']
        atom['z'] *= Length[unit, 'au']
        if atom['frame'].max() > 0:
            li = atom['frame'].max()
            atom = atom[~(atom['frame'] == li)]
            atom.reset_index(drop=True, inplace=True)
        del atom['label']
        self.atom = Atom(atom)

    def parse_orbital(self):
        """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe."""
        orbital = None
        _remo01 = 'Molecular Orbital Analysis'
        _remo02 = 'alpha - beta orbital overlaps'
        _remo03 = 'center of mass'
        check = self.find(_remo01)
        if any(['Alpha' in value for value in check]):
            alpha_starts = np.array(
                [no
                 for no, line in check if 'Alpha' in line], dtype=np.int64) + 2
            alpha_stops = np.array(
                [no
                 for no, line in check if 'Beta' in line], dtype=np.int64) - 1
            beta_starts = alpha_stops + 3
            beta_stops = np.array(self.find(_remo02, keys_only=True),
                                  dtype=np.int64) - 1
            alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops)
            beta_orbital = self._parse_orbital(beta_starts, beta_stops)
            alpha_orbital['spin'] = 0
            beta_orbital['spin'] = 1
            orbital = pd.concat((alpha_orbital, beta_orbital),
                                ignore_index=True)
        else:
            starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2
            stops = np.array(self.find(_remo03, keys_only=True),
                             dtype=np.int64) - 1
            orbital = self._parse_orbital(starts, stops)
            orbital['spin'] = 0
        orbital['group'] = 0
        self.orbital = Orbital(orbital)

    def parse_momatrix(self):
        """
        Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe.

        Note:
            Must supply 'print "final vectors" "final vectors analysis"' for momatrix
        """
        key0 = "Final MO vectors"
        key1 = "center of mass"
        found = self.find(key0, key1)
        if found[key0]:
            start = found[key0][0][0] + 6
            end = found[key1][0][0] - 1
            c = pd.read_fwf(StringIO("\n".join(self[start:end])),
                            widths=(6, 12, 12, 12, 12, 12, 12),
                            names=list(range(7)))
            self.c = c
            idx = c[c[0].isnull()].index.values
            c = c[~c.index.isin(idx)]
            del c[0]
            nbas = len(self.basis_set_order)
            n = c.shape[0] // nbas
            coefs = []
            # The for loop below is like numpy.array_split(df, n); using numpy.array_split
            # with dataframes seemed to have strange results where splits had wrong sizes?
            for i in range(n):
                coefs.append(c.iloc[i * nbas:(i + 1) *
                                    nbas, :].astype(float).dropna(
                                        axis=1).values.ravel("F"))
            c = np.concatenate(coefs)
            del coefs
            orbital, chi = _square_indices(len(self.basis_set_order))
            self.momatrix = MOMatrix.from_dict({
                'coef': c,
                'chi': chi,
                'orbital': orbital,
                'frame': 0
            })
            # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital})
            # momatrix['frame'] = 0
            # self.momatrix = momatrix

    def _parse_orbital(self, starts, stops):
        '''
        This function actually performs parsing of :class:`~exatomic.orbital.Orbital`

        See Also:
            :func:`~exnwchem.output.Output.parse_orbital`
        '''
        joined = '\n'.join(
            ['\n'.join(self[s:e]) for s, e in zip(starts, stops)])
        nvec = joined.count('Vector')
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        nbas *= nvec
        # Orbital dataframe -- alternatively one could parse the strings
        # into the DataFrame and then use the pd.Series.str methods to
        # perform all the replacements at the same time, eg. 'D' --> 'E'
        # and 'Occ=' --> '', etc.
        orb_no = np.empty((nvec, ), dtype=np.int64)
        occ = np.empty((nvec, ), dtype=np.float64)
        nrg = np.empty((nvec, ), dtype=np.float64)
        x = np.empty((nvec, ), dtype=np.float64)
        y = np.empty((nvec, ), dtype=np.float64)
        z = np.empty((nvec, ), dtype=np.float64)
        frame = np.empty((nvec, ), dtype=np.int64)
        fc = -1  # Frame counter
        oc = 0  # Orbital counter
        for s, e in zip(starts, stops):
            fc += 1
            for line in self[s:e]:
                ls = line.split()
                if 'Vector' in line:
                    orb_no[oc] = ls[1]
                    occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E')
                    nrg[oc] = ls[3].replace('E=', '').replace(
                        'D', 'E') if 'E=-' in line else ls[4].replace(
                            'D', 'E')
                    frame[oc] = fc
                elif 'MO Center' in line:
                    x[oc] = ls[2].replace(',', '').replace('D', 'E')
                    y[oc] = ls[3].replace(',', '').replace('D', 'E')
                    z[oc] = ls[4].replace(',', '').replace('D', 'E')
                    oc += 1
        orb_no -= 1
        return pd.DataFrame.from_dict({
            'x': x,
            'y': z,
            'z': z,
            'frame': frame,
            'vector': orb_no,
            'occupation': occ,
            'energy': nrg
        })

    def parse_basis_set(self):
        """
        Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
        """
        if not hasattr(self, "atom"):
            self.parse_atom()
        _rebas01 = ' Basis "'
        _rebas02 = ' Summary of "'
        _rebas03 = [
            ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ',
            ' j ', ' k ', ' l ', ' m ', ' p '
        ]
        found = self.find(_rebas01, _rebas02)
        spherical = True if "spherical" in found[_rebas01][0][1] else False
        start = found[_rebas01][0][0] + 2
        idx = 1 if len(found[_rebas02]) > 1 else -1
        stop = found[_rebas02][idx][0] - 1
        # Read in all of the extra lines that contain ---- and tag names
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(4, 2, 16, 16),
                         names=("shell", "L", "alpha", "d"))
        df.loc[df['shell'] == "--", "shell"] = np.nan
        tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
        idxs = tags.index.tolist()
        idxs.append(len(df))
        df['set'] = ""
        for i, tag in enumerate(tags):
            df.loc[idxs[i]:idxs[i + 1], "set"] = tag
        df = df.dropna().reset_index(drop=True)
        mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
        df['set'] = df['set'].map(mapper)
        df['L'] = df['L'].str.strip().str.lower().map(lmap)
        df['alpha'] = df['alpha'].astype(float)
        df['d'] = df['d'].astype(float)
        # NO SUPPORT FOR MULTIPLE FRAMES?
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = spherical
        self.atom['set'] = self.atom['tag'].map(mapper)

    def parse_basis_set_order(self):
        dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')]
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        if self.meta['spherical']:
            dtype += [('ml', 'i8')]
        else:
            dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')]
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        bso = np.empty((nbas, ), dtype=dtype)
        cnt = 0
        bases = self.basis_set.groupby('set')
        for seht, center in zip(self.atom['set'], self.atom.index):
            bas = bases.get_group(seht).groupby('shell')
            if self.meta['spherical']:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for ml in spherical_ordering_function(l):
                        bso[cnt] = (center, shell, l, ml)
                        cnt += 1
            else:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for _, ll, m, n in cartesian_ordering_function(l):
                        bso[cnt] = (center, shell, l, ll, m, n)
                        cnt += 1
        bso = pd.DataFrame(bso)
        bso['frame'] = 0
        # New shell definition consistent with basis internals
        shls = []
        grps = bso.groupby(['center', 'L'])
        cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        for (cen, L), grp in grps:
            for ml in grp['ml']:
                shls.append(cache[cen][L][ml])
                cache[cen][L][ml] += 1
        bso['shell'] = shls
        self.basis_set_order = bso

    def parse_roa(self):
        """
        Parse the :class:`~exatomic.core.tensor.Polarizability` dataframe. This will parse the
        output from the Raman Optical Activity outputs.

        Note:
            We generate a 3D tensor with the 2D tensor code. 3D tensors will have 3 rows labeled
            with the same name.
        """
        _reroa = 'roa begin'
        _reare = 'alpha real'
        _reaim = 'alpha im'
        #        _reombre = 'beta real'
        #        _reombim = 'beta im'
        _reombre = 'omega beta(real)'
        _reombim = 'omega beta(imag)'
        _redqre = 'dipole-quadrupole real (Cartesian)'
        _redqim = 'dipole-quadrupole imag (Cartesian)'

        if not self.find(_reroa):
            return
        found_2d = self.find(_reare,
                             _reaim,
                             _reombre,
                             _reombim,
                             keys_only=True)
        found_3d = self.find(_redqre, _redqim, keys_only=True)
        data = {}
        start = np.array(list(found_2d.values())).reshape(4, ) + 1
        end = np.array(list(found_2d.values())).reshape(4, ) + 10
        columns = ['x', 'val']
        data = [
            self.pandas_dataframe(s, e, columns) for s, e in zip(start, end)
        ]
        df = pd.concat([dat for dat in data]).reset_index(drop=True)
        df['grp'] = [i for i in range(4) for j in range(9)]
        df = df[['val', 'grp']]
        df = pd.DataFrame(
            df.groupby('grp').apply(
                lambda x: x.unstack().values[:-9]).values.tolist(),
            columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'])
        # find the electric dipole-quadrupole polarizability
        # NWChem gives this as a list of 18 values assuming the matrix to be symmetric
        # for our implementation we need to extend it to 27 elements
        # TODO: check that NWChem does assume that the 3D tensors are symmetric
        start = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 1
        end = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 19
        data = [
            self.pandas_dataframe(s, e, columns) for s, e in zip(start, end)
        ]
        df3 = pd.concat([dat for dat in data]).reset_index(drop=True)
        vals = df3['val'].values.reshape(2, 3, 6)
        adx = np.triu_indices(3)
        mat = np.zeros((2, 3, 3, 3))
        for i in range(2):
            for j in range(3):
                mat[i][j][adx] = vals[i][j]
                mat[i][j] = mat[i][j] + np.transpose(
                    mat[i][j]) - np.identity(3) * mat[i][j]
        mat = mat.reshape(18, 3)
        df3 = pd.DataFrame(mat, columns=['x', 'y', 'z'])
        df3['grp1'] = [i for i in range(2) for j in range(9)]
        df3['grp2'] = [j for i in range(2) for j in range(3) for n in range(3)]
        df3 = pd.DataFrame(
            df3.groupby([
                'grp1', 'grp2'
            ]).apply(lambda x: x.unstack().values[:-6]).values.tolist(),
            columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'],
            index=[
                'Ax_real', 'Ay_real', 'Az_real', 'Ax_imag', 'Ay_imag',
                'Az_imag'
            ])
        split_label = np.transpose([i.split('_') for i in df3.index.values])
        label = split_label[0]
        types = split_label[1]
        df['label'] = found_2d.keys()
        df['label'].replace(
            [_reare, _reombre, _reaim, _reombim],
            ['alpha-real', 'g_prime-real', 'alpha-imag', 'g_prime-imag'],
            inplace=True)
        df['type'] = [i.split('-')[-1] for i in df['label'].values]
        df['label'] = [i.split('-')[0] for i in df['label'].values]
        df['frame'] = np.repeat([0], len(df.index))
        df3['label'] = label
        df3['type'] = types
        df3['frame'] = np.repeat([0], len(df3.index))
        self.roa = pd.concat([df, df3], ignore_index=True)

    def parse_frequency(self):
        """
        Parse the :class:`~exatomic.core.atom.Frequency` dataframe.

        Note:
            This code removes all negative frequencies.
        """
        _remeth = "NORMAL MODE EIGENVECTORS IN CARTESIAN COORDINATES"
        _refreq = "Frequency"
        _renat = "Atom information"

        found = self.find(_remeth)
        fnat = self.find(_renat)
        if not found and not fnat:
            return
        # get atom information
        start = fnat[0][0] + 3
        stop = start
        while '----' not in self[stop]:
            stop += 1
        # we assume that there is only one instance of where _renat is found
        columns = ['symbol', 'atom', 'x', 'y', 'z', 'mass']
        atom = self.pandas_dataframe(start, stop, columns)
        atom['atom'] -= 1
        nat = len(atom)
        # find bounds where the calculated frequencies are
        start = found[0][0]
        stop = found[1][0]
        # get the data
        found = self.find(_refreq, start=start, stop=stop)
        dfs = []
        fdx = 0
        # get frequencies
        for lno, ln in found:
            # get the frequency values
            tmp = ln.split()[1:]
            freq = np.asarray([float(i) for i in tmp])
            ## TODO: here we remove all negative frequencies
            ##       need to find out if this is ok to do
            # set start and end points for the calculated normal modes
            staf = lno + start + 1
            stof = lno + start + nat * 3 + 2
            nm = self.pandas_dataframe(staf, stof,
                                       ncol=len(freq)).reset_index(drop=True)
            # generate boolean array that shows False for negative frequencies
            neg = [not f < 0 for f in freq]
            # remove negative frequencies
            nm.drop(columns=[idx for idx, val in enumerate(neg) if not val],
                    inplace=True)
            freq = freq[neg]
            # get normal modes in the x, y, z directions
            nm = nm.stack().values
            nfreq = len(freq)
            dx = nm[::3]
            dy = nm[1::3]
            dz = nm[2::3]
            # assemble dataframe
            symbol = np.tile(atom['symbol'], nfreq)
            adx = np.tile(atom['atom'], nfreq)
            freq = np.repeat(freq, nat)
            freqdx = np.repeat([i for i in range(fdx, fdx + nfreq)], nfreq)
            frames = np.repeat([0], nfreq * nat)
            fdx += nfreq
            stacked = pd.DataFrame.from_dict({
                'symbol': symbol,
                'atom': adx,
                'dx': dx,
                'dy': dy,
                'dz': dz,
                'freq': freq,
                'freqdx': freqdx,
                'frames': frames
            })
            dfs.append(stacked)
        frequency = pd.concat(dfs).reset_index(drop=True)
        self.frequency = frequency

    def parse_gradient(self):
        """
        Parse :class:`exatomic.core.gradient.Gradient` dataframe.
        """
        _regrad = "DFT ENERGY GRADIENTS"

        found = self.find(_regrad)
        if not found:
            return
        found = self.find(_regrad, keys_only=True)
        # find start and stop points
        starts = np.array(found) + 4
        stop = starts[0]
        while '----' not in self[stop]:
            stop += 1
        # backtrack one line as the line after the needed info is empty
        stop -= 1
        stops = starts + (stop - starts[0])
        dfs = []
        # generate dataframe array
        columns = ['atom', 'symbol', 'x', 'y', 'z', 'fx', 'fy', 'fz']
        for i, (start, stop) in enumerate(zip(starts, stops)):
            gradient = self.pandas_dataframe(start, stop, columns)
            gradient['frame'] = i
            dfs.append(gradient[['atom', 'symbol', 'fx', 'fy', 'fz', 'frame']])
        # construct the dataframe
        gradient = pd.concat(dfs).reset_index(drop=True)
        gradient['Z'] = gradient['symbol'].map(sym2z)
        # want to keep more or less the same order across dataframes
        # or at least try
        self.gradient = gradient[[
            'Z', 'atom', 'fx', 'fy', 'fz', 'symbol', 'frame'
        ]]

    def parse_frame(self):
        """
        Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed)
        :class:`~exatomic.core.atom.Atom` object.
        """
        _rescfen = 'Total SCF energy'
        _redften = 'Total DFT energy'
        self.frame = compute_frame_from_atom(self.atom)
        found = self.find(_rescfen, _redften)
        scfs = found[_rescfen]
        dfts = found[_redften]
        if scfs and dfts:
            print('Warning: found total energies from scf and dft, using dft')
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts
        elif scfs:
            scfs = [float(val.split()[-1]) for key, val in scfs]
            self.frame['total_energy'] = scfs
        elif dfts:
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts

    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)