Python BasisSet.functions Examples

Programming Language: Python

Namespace/Package Name: exatomic.core.basis

Class/Type: BasisSet

Method/Function: functions

Examples at hotexamples.com: 4

Python BasisSet.functions - 4 examples found. These are the top rated real world Python examples of exatomic.core.basis.BasisSet.functions extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

groupby(5)

BasisSet(4)

functions(3)

functions_by_shell(1)

primitives(1)

primitives_by_shell(1)

shells(1)

Example #1

Show file

File: output.py Project: tjduigna/exatomic

class Output(six.with_metaclass(OutMeta, Editor)):
    """Editor for NWChem calculation output file (stdout)."""

    def parse_atom(self):
        """Parse the atom dataframe."""
        _reatom01 = 'Geometry "'
        _reatom02 = 'Atomic Mass'
        _reatom03 = 'ECP       "ecp basis"'
        _reatom04 = 'Output coordinates in'
        found = self.find(_reatom01, _reatom02,
                          _reatom03, _reatom04, keys_only=True)
        unit = self[found[_reatom04][0]].split()[3]
        unit = "Angstrom" if unit == "angstroms" else "au"
        starts = np.array(found[_reatom01]) + 7
        stops = np.array(found[_reatom02]) - 1
        ecps = np.array(found[_reatom03]) + 2
        ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps}
        columns = ['label', 'tag', 'Z', 'x', 'y', 'z']
        atom = pd.concat([self.pandas_dataframe(s, e, columns)
                          for s, e in zip(starts, stops)])
        atom['symbol'] = atom['tag'].str.extract('([A-z]{1,})([0-9]*)',
                                                 expand=False)[0].str.lower().str.title()
        atom['Z'] = atom['Z'].astype(np.int64)
        atom['Zeff'] = (atom['Z'] - atom['tag'].map(ecps).fillna(value=0)).astype(np.int64)
        #n = len(atom)
        nf = atom.label.value_counts().max()
        nat = atom.label.max()
        atom['frame'] = [i for i in range(nf) for j in range(nat)]
        atom['label'] -= 1
        atom['x'] *= Length[unit, 'au']
        atom['y'] *= Length[unit, 'au']
        atom['z'] *= Length[unit, 'au']
        if atom['frame'].max() > 0:
            li = atom['frame'].max()
            atom = atom[~(atom['frame'] == li)]
            atom.reset_index(drop=True, inplace=True)
        del atom['label']
        self.atom = Atom(atom)

    def parse_orbital(self):
        """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe."""
        orbital = None
        _remo01 = 'Molecular Orbital Analysis'
        _remo02 = 'alpha - beta orbital overlaps'
        _remo03 = 'center of mass'
        check = self.find(_remo01)
        if any(['Alpha' in value for value in check]):
            alpha_starts = np.array([no for no, line in check if 'Alpha' in line], dtype=np.int64) + 2
            alpha_stops = np.array([no for no, line in check if 'Beta' in line], dtype=np.int64) - 1
            beta_starts = alpha_stops + 3
            beta_stops = np.array(self.find(_remo02, keys_only=True), dtype=np.int64) - 1
            alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops)
            beta_orbital = self._parse_orbital(beta_starts, beta_stops)
            alpha_orbital['spin'] = 0
            beta_orbital['spin'] = 1
            orbital = pd.concat((alpha_orbital, beta_orbital), ignore_index=True)
        else:
            starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2
            stops = np.array(self.find(_remo03, keys_only=True), dtype=np.int64) - 1
            orbital = self._parse_orbital(starts, stops)
            orbital['spin'] = 0
        orbital['group'] = 0
        self.orbital = Orbital(orbital)

    def parse_momatrix(self):
        """
        Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe.

        Note:
            Must supply 'print "final vectors" "final vectors analysis"' for momatrix
        """
        key0 = "Final MO vectors"
        key1 = "center of mass"
        found = self.find(key0, key1)
        if found[key0]:
            start = found[key0][0][0] + 6
            end = found[key1][0][0] - 1
            c = pd.read_fwf(StringIO("\n".join(self[start:end])), widths=(6, 12, 12, 12, 12, 12, 12),
                            names=list(range(7)))
            self.c = c
            idx = c[c[0].isnull()].index.values
            c = c[~c.index.isin(idx)]
            del c[0]
            nbas = len(self.basis_set_order)
            n = c.shape[0]//nbas
            coefs = []
            # The for loop below is like numpy.array_split(df, n); using numpy.array_split
            # with dataframes seemed to have strange results where splits had wrong sizes?
            for i in range(n):
                coefs.append(c.iloc[i*nbas:(i+1)*nbas, :].astype(float).dropna(axis=1).values.ravel("F"))
            c = np.concatenate(coefs)
            del coefs
            orbital, chi = _square_indices(len(self.basis_set_order))
            self.momatrix = MOMatrix.from_dict({'coef': c, 'chi': chi, 'orbital': orbital, 'frame': 0})
            # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital})
            # momatrix['frame'] = 0
            # self.momatrix = momatrix



    def _parse_orbital(self, starts, stops):
        '''
        This function actually performs parsing of :class:`~exatomic.orbital.Orbital`

        See Also:
            :func:`~exnwchem.output.Output.parse_orbital`
        '''
        joined = '\n'.join(['\n'.join(self[s:e]) for s, e in zip(starts, stops)])
        nvec = joined.count('Vector')
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        mapper = self.basis_set.functions(self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        nbas *= nvec
        # Orbital dataframe -- alternatively one could parse the strings
        # into the DataFrame and then use the pd.Series.str methods to
        # perform all the replacements at the same time, eg. 'D' --> 'E'
        # and 'Occ=' --> '', etc.
        orb_no = np.empty((nvec, ), dtype=np.int64)
        occ = np.empty((nvec, ), dtype=np.float64)
        nrg = np.empty((nvec, ), dtype=np.float64)
        x = np.empty((nvec, ), dtype=np.float64)
        y = np.empty((nvec, ), dtype=np.float64)
        z = np.empty((nvec, ), dtype=np.float64)
        frame = np.empty((nvec, ), dtype=np.int64)
        fc = -1   # Frame counter
        oc = 0   # Orbital counter
        for s, e in zip(starts, stops):
            fc += 1
            for line in self[s:e]:
                ls = line.split()
                if 'Vector' in line:
                    orb_no[oc] = ls[1]
                    occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E')
                    nrg[oc] = ls[3].replace('E=', '').replace('D', 'E') if 'E=-' in line else ls[4].replace('D', 'E')
                    frame[oc] = fc
                elif 'MO Center' in line:
                    x[oc] = ls[2].replace(',', '').replace('D', 'E')
                    y[oc] = ls[3].replace(',', '').replace('D', 'E')
                    z[oc] = ls[4].replace(',', '').replace('D', 'E')
                    oc += 1
        orb_no -= 1
        return pd.DataFrame.from_dict({'x': x, 'y': z, 'z': z, 'frame': frame,
                                       'vector': orb_no, 'occupation': occ, 'energy': nrg})

    def parse_basis_set(self):
        """
        Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
        """
        if not hasattr(self, "atom"):
            self.parse_atom()
        _rebas01 = ' Basis "'
        _rebas02 = ' Summary of "'
        _rebas03 = [' s ', ' px ', ' py ', ' pz ',
                    ' d ', ' f ', ' g ', ' h ', ' i ',
                    ' j ', ' k ', ' l ', ' m ', ' p ']
        found = self.find(_rebas01, _rebas02)
        spherical = True if "spherical" in found[_rebas01][0][1] else False
        start = found[_rebas01][0][0] + 2
        idx = 1 if len(found[_rebas02]) > 1 else -1
        stop = found[_rebas02][idx][0] - 1
        # Read in all of the extra lines that contain ---- and tag names
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(4, 2, 16, 16),
                         names=("shell", "L", "alpha", "d"))
        df.loc[df['shell'] == "--", "shell"] = np.nan
        tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
        idxs = tags.index.tolist()
        idxs.append(len(df))
        df['set'] = ""
        for i, tag in enumerate(tags):
            df.loc[idxs[i]:idxs[i + 1], "set"] = tag
        df = df.dropna().reset_index(drop=True)
        mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
        df['set'] = df['set'].map(mapper)
        df['L'] = df['L'].str.strip().str.lower().map(lmap)
        df['alpha'] = df['alpha'].astype(float)
        df['d'] = df['d'].astype(float)
        # NO SUPPORT FOR MULTIPLE FRAMES?
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = spherical
        self.atom['set'] = self.atom['tag'].map(mapper)

    def parse_basis_set_order(self):
        dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')]
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        if self.meta['spherical']:
            dtype += [('ml', 'i8')]
        else:
            dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')]
        mapper = self.basis_set.functions(self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        bso = np.empty((nbas,), dtype=dtype)
        cnt = 0
        bases = self.basis_set.groupby('set')
        for seht, center in zip(self.atom['set'], self.atom.index):
            bas = bases.get_group(seht).groupby('shell')
            if self.meta['spherical']:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for ml in spherical_ordering_function(l):
                        bso[cnt] = (center, shell, l, ml)
                        cnt += 1
            else:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for _, ll, m, n in cartesian_ordering_function(l):
                        bso[cnt] = (center, shell, l, ll, m, n)
                        cnt += 1
        bso = pd.DataFrame(bso)
        bso['frame'] = 0
        # New shell definition consistent with basis internals
        shls = []
        grps = bso.groupby(['center', 'L'])
        cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        for (cen, L), grp in grps:
            for ml in grp['ml']:
                shls.append(cache[cen][L][ml])
                cache[cen][L][ml] += 1
        bso['shell'] = shls
        self.basis_set_order = bso

    def parse_frame(self):
        """
        Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed)
        :class:`~exatomic.core.atom.Atom` object.
        """
        _rescfen = 'Total SCF energy'
        _redften = 'Total DFT energy'
        self.frame = compute_frame_from_atom(self.atom)
        found = self.find(_rescfen, _redften)
        scfs = found[_rescfen]
        dfts = found[_redften]
        if scfs and dfts:
            print('Warning: found total energies from scf and dft, using dft')
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts
        elif scfs:
            scfs = [float(val.split()[-1]) for key, val in scfs]
            self.frame['total_energy'] = scfs
        elif dfts:
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts


    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)

Example #2

Show file

File: output.py Project: farnoushnouri/exatomic

class Output(six.with_metaclass(OutMeta, Editor)):
    """Editor for NWChem calculation output file (stdout)."""
    def parse_atom(self):
        """Parse the atom dataframe."""
        _reatom01 = 'Geometry "'
        _reatom02 = 'Atomic Mass'
        _reatom03 = 'ECP       "ecp basis"'
        _reatom04 = 'Output coordinates in'
        found = self.find(_reatom01,
                          _reatom02,
                          _reatom03,
                          _reatom04,
                          keys_only=True)
        unit = self[found[_reatom04][0]].split()[3]
        unit = "Angstrom" if unit == "angstroms" else "au"
        starts = np.array(found[_reatom01]) + 7
        stops = np.array(found[_reatom02]) - 1
        ecps = np.array(found[_reatom03]) + 2
        ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps}
        columns = ['label', 'tag', 'Z', 'x', 'y', 'z']
        atom = pd.concat([
            self.pandas_dataframe(s, e, columns)
            for s, e in zip(starts, stops)
        ])
        atom['symbol'] = atom['tag'].str.extract(
            '([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title()
        atom['Z'] = atom['Z'].astype(np.int64)
        atom['Zeff'] = (atom['Z'] -
                        atom['tag'].map(ecps).fillna(value=0)).astype(np.int64)
        #n = len(atom)
        nf = atom.label.value_counts().max()
        nat = atom.label.max()
        atom['frame'] = [i for i in range(nf) for j in range(nat)]
        atom['label'] -= 1
        atom['x'] *= Length[unit, 'au']
        atom['y'] *= Length[unit, 'au']
        atom['z'] *= Length[unit, 'au']
        if atom['frame'].max() > 0:
            li = atom['frame'].max()
            atom = atom[~(atom['frame'] == li)]
            atom.reset_index(drop=True, inplace=True)
        del atom['label']
        self.atom = Atom(atom)

    def parse_orbital(self):
        """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe."""
        orbital = None
        _remo01 = 'Molecular Orbital Analysis'
        _remo02 = 'alpha - beta orbital overlaps'
        _remo03 = 'center of mass'
        check = self.find(_remo01)
        if any(['Alpha' in value for value in check]):
            alpha_starts = np.array(
                [no
                 for no, line in check if 'Alpha' in line], dtype=np.int64) + 2
            alpha_stops = np.array(
                [no
                 for no, line in check if 'Beta' in line], dtype=np.int64) - 1
            beta_starts = alpha_stops + 3
            beta_stops = np.array(self.find(_remo02, keys_only=True),
                                  dtype=np.int64) - 1
            alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops)
            beta_orbital = self._parse_orbital(beta_starts, beta_stops)
            alpha_orbital['spin'] = 0
            beta_orbital['spin'] = 1
            orbital = pd.concat((alpha_orbital, beta_orbital),
                                ignore_index=True)
        else:
            starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2
            stops = np.array(self.find(_remo03, keys_only=True),
                             dtype=np.int64) - 1
            orbital = self._parse_orbital(starts, stops)
            orbital['spin'] = 0
        orbital['group'] = 0
        self.orbital = Orbital(orbital)

    def parse_momatrix(self):
        """
        Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe.

        Note:
            Must supply 'print "final vectors" "final vectors analysis"' for momatrix
        """
        key0 = "Final MO vectors"
        key1 = "center of mass"
        found = self.find(key0, key1)
        if found[key0]:
            start = found[key0][0][0] + 6
            end = found[key1][0][0] - 1
            c = pd.read_fwf(StringIO("\n".join(self[start:end])),
                            widths=(6, 12, 12, 12, 12, 12, 12),
                            names=list(range(7)))
            self.c = c
            idx = c[c[0].isnull()].index.values
            c = c[~c.index.isin(idx)]
            del c[0]
            nbas = len(self.basis_set_order)
            n = c.shape[0] // nbas
            coefs = []
            # The for loop below is like numpy.array_split(df, n); using numpy.array_split
            # with dataframes seemed to have strange results where splits had wrong sizes?
            for i in range(n):
                coefs.append(c.iloc[i * nbas:(i + 1) *
                                    nbas, :].astype(float).dropna(
                                        axis=1).values.ravel("F"))
            c = np.concatenate(coefs)
            del coefs
            orbital, chi = _square_indices(len(self.basis_set_order))
            self.momatrix = MOMatrix.from_dict({
                'coef': c,
                'chi': chi,
                'orbital': orbital,
                'frame': 0
            })
            # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital})
            # momatrix['frame'] = 0
            # self.momatrix = momatrix

    def _parse_orbital(self, starts, stops):
        '''
        This function actually performs parsing of :class:`~exatomic.orbital.Orbital`

        See Also:
            :func:`~exnwchem.output.Output.parse_orbital`
        '''
        joined = '\n'.join(
            ['\n'.join(self[s:e]) for s, e in zip(starts, stops)])
        nvec = joined.count('Vector')
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        nbas *= nvec
        # Orbital dataframe -- alternatively one could parse the strings
        # into the DataFrame and then use the pd.Series.str methods to
        # perform all the replacements at the same time, eg. 'D' --> 'E'
        # and 'Occ=' --> '', etc.
        orb_no = np.empty((nvec, ), dtype=np.int64)
        occ = np.empty((nvec, ), dtype=np.float64)
        nrg = np.empty((nvec, ), dtype=np.float64)
        x = np.empty((nvec, ), dtype=np.float64)
        y = np.empty((nvec, ), dtype=np.float64)
        z = np.empty((nvec, ), dtype=np.float64)
        frame = np.empty((nvec, ), dtype=np.int64)
        fc = -1  # Frame counter
        oc = 0  # Orbital counter
        for s, e in zip(starts, stops):
            fc += 1
            for line in self[s:e]:
                ls = line.split()
                if 'Vector' in line:
                    orb_no[oc] = ls[1]
                    occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E')
                    nrg[oc] = ls[3].replace('E=', '').replace(
                        'D', 'E') if 'E=-' in line else ls[4].replace(
                            'D', 'E')
                    frame[oc] = fc
                elif 'MO Center' in line:
                    x[oc] = ls[2].replace(',', '').replace('D', 'E')
                    y[oc] = ls[3].replace(',', '').replace('D', 'E')
                    z[oc] = ls[4].replace(',', '').replace('D', 'E')
                    oc += 1
        orb_no -= 1
        return pd.DataFrame.from_dict({
            'x': x,
            'y': z,
            'z': z,
            'frame': frame,
            'vector': orb_no,
            'occupation': occ,
            'energy': nrg
        })

    def parse_basis_set(self):
        """
        Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
        """
        if not hasattr(self, "atom"):
            self.parse_atom()
        _rebas01 = ' Basis "'
        _rebas02 = ' Summary of "'
        _rebas03 = [
            ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ',
            ' j ', ' k ', ' l ', ' m ', ' p '
        ]
        found = self.find(_rebas01, _rebas02)
        spherical = True if "spherical" in found[_rebas01][0][1] else False
        start = found[_rebas01][0][0] + 2
        idx = 1 if len(found[_rebas02]) > 1 else -1
        stop = found[_rebas02][idx][0] - 1
        # Read in all of the extra lines that contain ---- and tag names
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(4, 2, 16, 16),
                         names=("shell", "L", "alpha", "d"))
        df.loc[df['shell'] == "--", "shell"] = np.nan
        tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
        idxs = tags.index.tolist()
        idxs.append(len(df))
        df['set'] = ""
        for i, tag in enumerate(tags):
            df.loc[idxs[i]:idxs[i + 1], "set"] = tag
        df = df.dropna().reset_index(drop=True)
        mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
        df['set'] = df['set'].map(mapper)
        df['L'] = df['L'].str.strip().str.lower().map(lmap)
        df['alpha'] = df['alpha'].astype(float)
        df['d'] = df['d'].astype(float)
        # NO SUPPORT FOR MULTIPLE FRAMES?
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = spherical
        self.atom['set'] = self.atom['tag'].map(mapper)

    def parse_basis_set_order(self):
        dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')]
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        if self.meta['spherical']:
            dtype += [('ml', 'i8')]
        else:
            dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')]
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        bso = np.empty((nbas, ), dtype=dtype)
        cnt = 0
        bases = self.basis_set.groupby('set')
        for seht, center in zip(self.atom['set'], self.atom.index):
            bas = bases.get_group(seht).groupby('shell')
            if self.meta['spherical']:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for ml in spherical_ordering_function(l):
                        bso[cnt] = (center, shell, l, ml)
                        cnt += 1
            else:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for _, ll, m, n in cartesian_ordering_function(l):
                        bso[cnt] = (center, shell, l, ll, m, n)
                        cnt += 1
        bso = pd.DataFrame(bso)
        bso['frame'] = 0
        # New shell definition consistent with basis internals
        shls = []
        grps = bso.groupby(['center', 'L'])
        cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        for (cen, L), grp in grps:
            for ml in grp['ml']:
                shls.append(cache[cen][L][ml])
                cache[cen][L][ml] += 1
        bso['shell'] = shls
        self.basis_set_order = bso

    def parse_frame(self):
        """
        Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed)
        :class:`~exatomic.core.atom.Atom` object.
        """
        _rescfen = 'Total SCF energy'
        _redften = 'Total DFT energy'
        self.frame = compute_frame_from_atom(self.atom)
        found = self.find(_rescfen, _redften)
        scfs = found[_rescfen]
        dfts = found[_redften]
        if scfs and dfts:
            print('Warning: found total energies from scf and dft, using dft')
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts
        elif scfs:
            scfs = [float(val.split()[-1]) for key, val in scfs]
            self.frame['total_energy'] = scfs
        elif dfts:
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts

    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)

Example #3

Show file

class TestBasisSet(TestCase):
    def setUp(self):
        adict = {col: [0] for col in BasisSet._columns}
        adict['frame'] = 0
        # Trivial basis set
        self.bs = BasisSet(adict)
        self.bs['alpha'] = self.bs['alpha'].astype(np.float64)
        self.bs['d'] = self.bs['d'].astype(np.float64)
        # Medium basis set
        self.mbs = BasisSet({
            'frame': 0,
            'alpha': [5., 1., 1.],
            'd': [1., 1., 1.],
            'shell': [0, 1, 0],
            'set': [0, 0, 1],
            'L': [0, 1, 0],
            'n': [1, 2, 1]
        })
        # Large basis set
        self.lbs = BasisSet({
            'frame': 0,
            'alpha': [5., 3., 1., 3., 1., 1., 3., 1., 1.],
            'd': [1., 1., 1., 1., 1., 1., 1., 1., 1.],
            'shell': [0, 0, 0, 1, 1, 2, 0, 0, 1],
            'set': [0, 0, 0, 0, 0, 0, 1, 1, 1],
            'L': [0, 0, 0, 1, 1, 2, 0, 0, 1]
        })

    def test_lmax(self):
        self.assertEqual(self.bs.lmax, 0)
        self.assertEqual(self.mbs.lmax, 1)
        self.assertEqual(self.lbs.lmax, 2)

    def test_shells(self):
        self.bs.shells()
        self.mbs.shells()
        self.lbs.shells()

    def test_functions_by_shell(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue((self.bs.functions_by_shell() == pd.Series(
            [1], index=mfp([[0], [0]], names=n))).all())
        self.assertTrue((self.mbs.functions_by_shell() == pd.Series(
            [1, 1, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.functions_by_shell() == pd.Series(
            [1, 1, 1, 1, 1],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())

    def test_primitives_by_shell(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue((self.bs.primitives_by_shell() == pd.Series(
            [1], index=mfp([[0], [0]], names=n))).all())
        self.assertTrue((self.mbs.primitives_by_shell() == pd.Series(
            [1, 1, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.primitives_by_shell() == pd.Series(
            [3, 2, 1, 2, 1],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())

    def test_functions(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue(
            (self.bs.functions(False) == pd.Series([1],
                                                   index=mfp([[0], [0]],
                                                             names=n))).all())
        self.assertTrue(
            (self.bs.functions(True) == pd.Series([1],
                                                  index=mfp([[0], [0]],
                                                            names=n))).all())
        self.assertTrue((self.mbs.functions(False) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.mbs.functions(True) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.functions(False) == pd.Series(
            [1, 3, 6, 1, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())
        self.assertTrue((self.lbs.functions(True) == pd.Series(
            [1, 3, 5, 1, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())

    def test_primitives(self):
        n = ['set', 'L']
        mfp = pd.MultiIndex.from_product
        mfa = pd.MultiIndex.from_arrays
        self.assertTrue(
            (self.bs.primitives(False) == pd.Series([1],
                                                    index=mfp([[0], [0]],
                                                              names=n))).all())
        self.assertTrue(
            (self.bs.primitives(True) == pd.Series([1],
                                                   index=mfp([[0], [0]],
                                                             names=n))).all())
        self.assertTrue((self.mbs.primitives(False) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.mbs.primitives(True) == pd.Series(
            [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all())
        self.assertTrue((self.lbs.primitives(False) == pd.Series(
            [3, 6, 6, 2, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())
        self.assertTrue((self.lbs.primitives(True) == pd.Series(
            [3, 6, 5, 2, 3],
            index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())

Example #4

Show file

File: output.py Project: chrinide/exatomic

class Output(six.with_metaclass(OutMeta, Editor)):
    """Editor for NWChem calculation output file (stdout)."""
    def parse_atom(self):
        """Parse the atom dataframe."""
        _reatom01 = 'Geometry "'
        _reatom02 = 'Atomic Mass'
        _reatom03 = 'ECP       "ecp basis"'
        _reatom04 = 'Output coordinates in'
        found = self.find(_reatom01,
                          _reatom02,
                          _reatom03,
                          _reatom04,
                          keys_only=True)
        unit = self[found[_reatom04][0]].split()[3]
        unit = "Angstrom" if unit == "angstroms" else "au"
        starts = np.array(found[_reatom01]) + 7
        stops = np.array(found[_reatom02]) - 1
        ecps = np.array(found[_reatom03]) + 2
        ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps}
        columns = ['label', 'tag', 'Z', 'x', 'y', 'z']
        atom = pd.concat([
            self.pandas_dataframe(s, e, columns)
            for s, e in zip(starts, stops)
        ])
        atom['symbol'] = atom['tag'].str.extract(
            '([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title()
        atom['Z'] = atom['Z'].astype(np.int64)
        atom['Zeff'] = (atom['Z'] -
                        atom['tag'].map(ecps).fillna(value=0)).astype(np.int64)
        #n = len(atom)
        nf = atom.label.value_counts().max()
        nat = atom.label.max()
        atom['frame'] = [i for i in range(nf) for j in range(nat)]
        atom['label'] -= 1
        atom['x'] *= Length[unit, 'au']
        atom['y'] *= Length[unit, 'au']
        atom['z'] *= Length[unit, 'au']
        if atom['frame'].max() > 0:
            li = atom['frame'].max()
            atom = atom[~(atom['frame'] == li)]
            atom.reset_index(drop=True, inplace=True)
        del atom['label']
        self.atom = Atom(atom)

    def parse_orbital(self):
        """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe."""
        orbital = None
        _remo01 = 'Molecular Orbital Analysis'
        _remo02 = 'alpha - beta orbital overlaps'
        _remo03 = 'center of mass'
        check = self.find(_remo01)
        if any(['Alpha' in value for value in check]):
            alpha_starts = np.array(
                [no
                 for no, line in check if 'Alpha' in line], dtype=np.int64) + 2
            alpha_stops = np.array(
                [no
                 for no, line in check if 'Beta' in line], dtype=np.int64) - 1
            beta_starts = alpha_stops + 3
            beta_stops = np.array(self.find(_remo02, keys_only=True),
                                  dtype=np.int64) - 1
            alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops)
            beta_orbital = self._parse_orbital(beta_starts, beta_stops)
            alpha_orbital['spin'] = 0
            beta_orbital['spin'] = 1
            orbital = pd.concat((alpha_orbital, beta_orbital),
                                ignore_index=True)
        else:
            starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2
            stops = np.array(self.find(_remo03, keys_only=True),
                             dtype=np.int64) - 1
            orbital = self._parse_orbital(starts, stops)
            orbital['spin'] = 0
        orbital['group'] = 0
        self.orbital = Orbital(orbital)

    def parse_momatrix(self):
        """
        Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe.

        Note:
            Must supply 'print "final vectors" "final vectors analysis"' for momatrix
        """
        key0 = "Final MO vectors"
        key1 = "center of mass"
        found = self.find(key0, key1)
        if found[key0]:
            start = found[key0][0][0] + 6
            end = found[key1][0][0] - 1
            c = pd.read_fwf(StringIO("\n".join(self[start:end])),
                            widths=(6, 12, 12, 12, 12, 12, 12),
                            names=list(range(7)))
            self.c = c
            idx = c[c[0].isnull()].index.values
            c = c[~c.index.isin(idx)]
            del c[0]
            nbas = len(self.basis_set_order)
            n = c.shape[0] // nbas
            coefs = []
            # The for loop below is like numpy.array_split(df, n); using numpy.array_split
            # with dataframes seemed to have strange results where splits had wrong sizes?
            for i in range(n):
                coefs.append(c.iloc[i * nbas:(i + 1) *
                                    nbas, :].astype(float).dropna(
                                        axis=1).values.ravel("F"))
            c = np.concatenate(coefs)
            del coefs
            orbital, chi = _square_indices(len(self.basis_set_order))
            self.momatrix = MOMatrix.from_dict({
                'coef': c,
                'chi': chi,
                'orbital': orbital,
                'frame': 0
            })
            # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital})
            # momatrix['frame'] = 0
            # self.momatrix = momatrix

    def _parse_orbital(self, starts, stops):
        '''
        This function actually performs parsing of :class:`~exatomic.orbital.Orbital`

        See Also:
            :func:`~exnwchem.output.Output.parse_orbital`
        '''
        joined = '\n'.join(
            ['\n'.join(self[s:e]) for s, e in zip(starts, stops)])
        nvec = joined.count('Vector')
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        nbas *= nvec
        # Orbital dataframe -- alternatively one could parse the strings
        # into the DataFrame and then use the pd.Series.str methods to
        # perform all the replacements at the same time, eg. 'D' --> 'E'
        # and 'Occ=' --> '', etc.
        orb_no = np.empty((nvec, ), dtype=np.int64)
        occ = np.empty((nvec, ), dtype=np.float64)
        nrg = np.empty((nvec, ), dtype=np.float64)
        x = np.empty((nvec, ), dtype=np.float64)
        y = np.empty((nvec, ), dtype=np.float64)
        z = np.empty((nvec, ), dtype=np.float64)
        frame = np.empty((nvec, ), dtype=np.int64)
        fc = -1  # Frame counter
        oc = 0  # Orbital counter
        for s, e in zip(starts, stops):
            fc += 1
            for line in self[s:e]:
                ls = line.split()
                if 'Vector' in line:
                    orb_no[oc] = ls[1]
                    occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E')
                    nrg[oc] = ls[3].replace('E=', '').replace(
                        'D', 'E') if 'E=-' in line else ls[4].replace(
                            'D', 'E')
                    frame[oc] = fc
                elif 'MO Center' in line:
                    x[oc] = ls[2].replace(',', '').replace('D', 'E')
                    y[oc] = ls[3].replace(',', '').replace('D', 'E')
                    z[oc] = ls[4].replace(',', '').replace('D', 'E')
                    oc += 1
        orb_no -= 1
        return pd.DataFrame.from_dict({
            'x': x,
            'y': z,
            'z': z,
            'frame': frame,
            'vector': orb_no,
            'occupation': occ,
            'energy': nrg
        })

    def parse_basis_set(self):
        """
        Parse the :class:`~exatomic.core.basis.BasisSet` dataframe.
        """
        if not hasattr(self, "atom"):
            self.parse_atom()
        _rebas01 = ' Basis "'
        _rebas02 = ' Summary of "'
        _rebas03 = [
            ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ',
            ' j ', ' k ', ' l ', ' m ', ' p '
        ]
        found = self.find(_rebas01, _rebas02)
        spherical = True if "spherical" in found[_rebas01][0][1] else False
        start = found[_rebas01][0][0] + 2
        idx = 1 if len(found[_rebas02]) > 1 else -1
        stop = found[_rebas02][idx][0] - 1
        # Read in all of the extra lines that contain ---- and tag names
        df = pd.read_fwf(StringIO("\n".join(self[start:stop])),
                         widths=(4, 2, 16, 16),
                         names=("shell", "L", "alpha", "d"))
        df.loc[df['shell'] == "--", "shell"] = np.nan
        tags = df.loc[(df['shell'].str.isdigit() == False), "shell"]
        idxs = tags.index.tolist()
        idxs.append(len(df))
        df['set'] = ""
        for i, tag in enumerate(tags):
            df.loc[idxs[i]:idxs[i + 1], "set"] = tag
        df = df.dropna().reset_index(drop=True)
        mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()}
        df['set'] = df['set'].map(mapper)
        df['L'] = df['L'].str.strip().str.lower().map(lmap)
        df['alpha'] = df['alpha'].astype(float)
        df['d'] = df['d'].astype(float)
        # NO SUPPORT FOR MULTIPLE FRAMES?
        df['frame'] = 0
        self.basis_set = BasisSet(df)
        self.meta['spherical'] = spherical
        self.atom['set'] = self.atom['tag'].map(mapper)

    def parse_basis_set_order(self):
        dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')]
        if 'spherical' not in self.meta:
            self.parse_basis_set()
        if self.meta['spherical']:
            dtype += [('ml', 'i8')]
        else:
            dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')]
        mapper = self.basis_set.functions(
            self.meta['spherical']).groupby(level="set").sum()
        nbas = self.atom['set'].map(mapper).sum()
        bso = np.empty((nbas, ), dtype=dtype)
        cnt = 0
        bases = self.basis_set.groupby('set')
        for seht, center in zip(self.atom['set'], self.atom.index):
            bas = bases.get_group(seht).groupby('shell')
            if self.meta['spherical']:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for ml in spherical_ordering_function(l):
                        bso[cnt] = (center, shell, l, ml)
                        cnt += 1
            else:
                for shell, grp in bas:
                    l = grp['L'].values[0]
                    for _, ll, m, n in cartesian_ordering_function(l):
                        bso[cnt] = (center, shell, l, ll, m, n)
                        cnt += 1
        bso = pd.DataFrame(bso)
        bso['frame'] = 0
        # New shell definition consistent with basis internals
        shls = []
        grps = bso.groupby(['center', 'L'])
        cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        for (cen, L), grp in grps:
            for ml in grp['ml']:
                shls.append(cache[cen][L][ml])
                cache[cen][L][ml] += 1
        bso['shell'] = shls
        self.basis_set_order = bso

    def parse_roa(self):
        """
        Parse the :class:`~exatomic.core.tensor.Polarizability` dataframe. This will parse the
        output from the Raman Optical Activity outputs.

        Note:
            We generate a 3D tensor with the 2D tensor code. 3D tensors will have 3 rows labeled
            with the same name.
        """
        _reroa = 'roa begin'
        _reare = 'alpha real'
        _reaim = 'alpha im'
        #        _reombre = 'beta real'
        #        _reombim = 'beta im'
        _reombre = 'omega beta(real)'
        _reombim = 'omega beta(imag)'
        _redqre = 'dipole-quadrupole real (Cartesian)'
        _redqim = 'dipole-quadrupole imag (Cartesian)'

        if not self.find(_reroa):
            return
        found_2d = self.find(_reare,
                             _reaim,
                             _reombre,
                             _reombim,
                             keys_only=True)
        found_3d = self.find(_redqre, _redqim, keys_only=True)
        data = {}
        start = np.array(list(found_2d.values())).reshape(4, ) + 1
        end = np.array(list(found_2d.values())).reshape(4, ) + 10
        columns = ['x', 'val']
        data = [
            self.pandas_dataframe(s, e, columns) for s, e in zip(start, end)
        ]
        df = pd.concat([dat for dat in data]).reset_index(drop=True)
        df['grp'] = [i for i in range(4) for j in range(9)]
        df = df[['val', 'grp']]
        df = pd.DataFrame(
            df.groupby('grp').apply(
                lambda x: x.unstack().values[:-9]).values.tolist(),
            columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'])
        # find the electric dipole-quadrupole polarizability
        # NWChem gives this as a list of 18 values assuming the matrix to be symmetric
        # for our implementation we need to extend it to 27 elements
        # TODO: check that NWChem does assume that the 3D tensors are symmetric
        start = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 1
        end = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 19
        data = [
            self.pandas_dataframe(s, e, columns) for s, e in zip(start, end)
        ]
        df3 = pd.concat([dat for dat in data]).reset_index(drop=True)
        vals = df3['val'].values.reshape(2, 3, 6)
        adx = np.triu_indices(3)
        mat = np.zeros((2, 3, 3, 3))
        for i in range(2):
            for j in range(3):
                mat[i][j][adx] = vals[i][j]
                mat[i][j] = mat[i][j] + np.transpose(
                    mat[i][j]) - np.identity(3) * mat[i][j]
        mat = mat.reshape(18, 3)
        df3 = pd.DataFrame(mat, columns=['x', 'y', 'z'])
        df3['grp1'] = [i for i in range(2) for j in range(9)]
        df3['grp2'] = [j for i in range(2) for j in range(3) for n in range(3)]
        df3 = pd.DataFrame(
            df3.groupby([
                'grp1', 'grp2'
            ]).apply(lambda x: x.unstack().values[:-6]).values.tolist(),
            columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'],
            index=[
                'Ax_real', 'Ay_real', 'Az_real', 'Ax_imag', 'Ay_imag',
                'Az_imag'
            ])
        split_label = np.transpose([i.split('_') for i in df3.index.values])
        label = split_label[0]
        types = split_label[1]
        df['label'] = found_2d.keys()
        df['label'].replace(
            [_reare, _reombre, _reaim, _reombim],
            ['alpha-real', 'g_prime-real', 'alpha-imag', 'g_prime-imag'],
            inplace=True)
        df['type'] = [i.split('-')[-1] for i in df['label'].values]
        df['label'] = [i.split('-')[0] for i in df['label'].values]
        df['frame'] = np.repeat([0], len(df.index))
        df3['label'] = label
        df3['type'] = types
        df3['frame'] = np.repeat([0], len(df3.index))
        self.roa = pd.concat([df, df3], ignore_index=True)

    def parse_frequency(self):
        """
        Parse the :class:`~exatomic.core.atom.Frequency` dataframe.

        Note:
            This code removes all negative frequencies.
        """
        _remeth = "NORMAL MODE EIGENVECTORS IN CARTESIAN COORDINATES"
        _refreq = "Frequency"
        _renat = "Atom information"

        found = self.find(_remeth)
        fnat = self.find(_renat)
        if not found and not fnat:
            return
        # get atom information
        start = fnat[0][0] + 3
        stop = start
        while '----' not in self[stop]:
            stop += 1
        # we assume that there is only one instance of where _renat is found
        columns = ['symbol', 'atom', 'x', 'y', 'z', 'mass']
        atom = self.pandas_dataframe(start, stop, columns)
        atom['atom'] -= 1
        nat = len(atom)
        # find bounds where the calculated frequencies are
        start = found[0][0]
        stop = found[1][0]
        # get the data
        found = self.find(_refreq, start=start, stop=stop)
        dfs = []
        fdx = 0
        # get frequencies
        for lno, ln in found:
            # get the frequency values
            tmp = ln.split()[1:]
            freq = np.asarray([float(i) for i in tmp])
            ## TODO: here we remove all negative frequencies
            ##       need to find out if this is ok to do
            # set start and end points for the calculated normal modes
            staf = lno + start + 1
            stof = lno + start + nat * 3 + 2
            nm = self.pandas_dataframe(staf, stof,
                                       ncol=len(freq)).reset_index(drop=True)
            # generate boolean array that shows False for negative frequencies
            neg = [not f < 0 for f in freq]
            # remove negative frequencies
            nm.drop(columns=[idx for idx, val in enumerate(neg) if not val],
                    inplace=True)
            freq = freq[neg]
            # get normal modes in the x, y, z directions
            nm = nm.stack().values
            nfreq = len(freq)
            dx = nm[::3]
            dy = nm[1::3]
            dz = nm[2::3]
            # assemble dataframe
            symbol = np.tile(atom['symbol'], nfreq)
            adx = np.tile(atom['atom'], nfreq)
            freq = np.repeat(freq, nat)
            freqdx = np.repeat([i for i in range(fdx, fdx + nfreq)], nfreq)
            frames = np.repeat([0], nfreq * nat)
            fdx += nfreq
            stacked = pd.DataFrame.from_dict({
                'symbol': symbol,
                'atom': adx,
                'dx': dx,
                'dy': dy,
                'dz': dz,
                'freq': freq,
                'freqdx': freqdx,
                'frames': frames
            })
            dfs.append(stacked)
        frequency = pd.concat(dfs).reset_index(drop=True)
        self.frequency = frequency

    def parse_gradient(self):
        """
        Parse :class:`exatomic.core.gradient.Gradient` dataframe.
        """
        _regrad = "DFT ENERGY GRADIENTS"

        found = self.find(_regrad)
        if not found:
            return
        found = self.find(_regrad, keys_only=True)
        # find start and stop points
        starts = np.array(found) + 4
        stop = starts[0]
        while '----' not in self[stop]:
            stop += 1
        # backtrack one line as the line after the needed info is empty
        stop -= 1
        stops = starts + (stop - starts[0])
        dfs = []
        # generate dataframe array
        columns = ['atom', 'symbol', 'x', 'y', 'z', 'fx', 'fy', 'fz']
        for i, (start, stop) in enumerate(zip(starts, stops)):
            gradient = self.pandas_dataframe(start, stop, columns)
            gradient['frame'] = i
            dfs.append(gradient[['atom', 'symbol', 'fx', 'fy', 'fz', 'frame']])
        # construct the dataframe
        gradient = pd.concat(dfs).reset_index(drop=True)
        gradient['Z'] = gradient['symbol'].map(sym2z)
        # want to keep more or less the same order across dataframes
        # or at least try
        self.gradient = gradient[[
            'Z', 'atom', 'fx', 'fy', 'fz', 'symbol', 'frame'
        ]]

    def parse_frame(self):
        """
        Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed)
        :class:`~exatomic.core.atom.Atom` object.
        """
        _rescfen = 'Total SCF energy'
        _redften = 'Total DFT energy'
        self.frame = compute_frame_from_atom(self.atom)
        found = self.find(_rescfen, _redften)
        scfs = found[_rescfen]
        dfts = found[_redften]
        if scfs and dfts:
            print('Warning: found total energies from scf and dft, using dft')
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts
        elif scfs:
            scfs = [float(val.split()[-1]) for key, val in scfs]
            self.frame['total_energy'] = scfs
        elif dfts:
            dfts = [float(val.split()[-1]) for key, val in dfts]
            self.frame['total_energy'] = dfts

    def __init__(self, *args, **kwargs):
        super(Output, self).__init__(*args, **kwargs)