class Output(six.with_metaclass(OutMeta, Editor)): """Editor for NWChem calculation output file (stdout).""" def parse_atom(self): """Parse the atom dataframe.""" _reatom01 = 'Geometry "' _reatom02 = 'Atomic Mass' _reatom03 = 'ECP "ecp basis"' _reatom04 = 'Output coordinates in' found = self.find(_reatom01, _reatom02, _reatom03, _reatom04, keys_only=True) unit = self[found[_reatom04][0]].split()[3] unit = "Angstrom" if unit == "angstroms" else "au" starts = np.array(found[_reatom01]) + 7 stops = np.array(found[_reatom02]) - 1 ecps = np.array(found[_reatom03]) + 2 ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps} columns = ['label', 'tag', 'Z', 'x', 'y', 'z'] atom = pd.concat([self.pandas_dataframe(s, e, columns) for s, e in zip(starts, stops)]) atom['symbol'] = atom['tag'].str.extract('([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title() atom['Z'] = atom['Z'].astype(np.int64) atom['Zeff'] = (atom['Z'] - atom['tag'].map(ecps).fillna(value=0)).astype(np.int64) #n = len(atom) nf = atom.label.value_counts().max() nat = atom.label.max() atom['frame'] = [i for i in range(nf) for j in range(nat)] atom['label'] -= 1 atom['x'] *= Length[unit, 'au'] atom['y'] *= Length[unit, 'au'] atom['z'] *= Length[unit, 'au'] if atom['frame'].max() > 0: li = atom['frame'].max() atom = atom[~(atom['frame'] == li)] atom.reset_index(drop=True, inplace=True) del atom['label'] self.atom = Atom(atom) def parse_orbital(self): """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe.""" orbital = None _remo01 = 'Molecular Orbital Analysis' _remo02 = 'alpha - beta orbital overlaps' _remo03 = 'center of mass' check = self.find(_remo01) if any(['Alpha' in value for value in check]): alpha_starts = np.array([no for no, line in check if 'Alpha' in line], dtype=np.int64) + 2 alpha_stops = np.array([no for no, line in check if 'Beta' in line], dtype=np.int64) - 1 beta_starts = alpha_stops + 3 beta_stops = np.array(self.find(_remo02, keys_only=True), dtype=np.int64) - 1 alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops) beta_orbital = self._parse_orbital(beta_starts, beta_stops) alpha_orbital['spin'] = 0 beta_orbital['spin'] = 1 orbital = pd.concat((alpha_orbital, beta_orbital), ignore_index=True) else: starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2 stops = np.array(self.find(_remo03, keys_only=True), dtype=np.int64) - 1 orbital = self._parse_orbital(starts, stops) orbital['spin'] = 0 orbital['group'] = 0 self.orbital = Orbital(orbital) def parse_momatrix(self): """ Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe. Note: Must supply 'print "final vectors" "final vectors analysis"' for momatrix """ key0 = "Final MO vectors" key1 = "center of mass" found = self.find(key0, key1) if found[key0]: start = found[key0][0][0] + 6 end = found[key1][0][0] - 1 c = pd.read_fwf(StringIO("\n".join(self[start:end])), widths=(6, 12, 12, 12, 12, 12, 12), names=list(range(7))) self.c = c idx = c[c[0].isnull()].index.values c = c[~c.index.isin(idx)] del c[0] nbas = len(self.basis_set_order) n = c.shape[0]//nbas coefs = [] # The for loop below is like numpy.array_split(df, n); using numpy.array_split # with dataframes seemed to have strange results where splits had wrong sizes? for i in range(n): coefs.append(c.iloc[i*nbas:(i+1)*nbas, :].astype(float).dropna(axis=1).values.ravel("F")) c = np.concatenate(coefs) del coefs orbital, chi = _square_indices(len(self.basis_set_order)) self.momatrix = MOMatrix.from_dict({'coef': c, 'chi': chi, 'orbital': orbital, 'frame': 0}) # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital}) # momatrix['frame'] = 0 # self.momatrix = momatrix def _parse_orbital(self, starts, stops): ''' This function actually performs parsing of :class:`~exatomic.orbital.Orbital` See Also: :func:`~exnwchem.output.Output.parse_orbital` ''' joined = '\n'.join(['\n'.join(self[s:e]) for s, e in zip(starts, stops)]) nvec = joined.count('Vector') if 'spherical' not in self.meta: self.parse_basis_set() mapper = self.basis_set.functions(self.meta['spherical']).groupby(level="set").sum() nbas = self.atom['set'].map(mapper).sum() nbas *= nvec # Orbital dataframe -- alternatively one could parse the strings # into the DataFrame and then use the pd.Series.str methods to # perform all the replacements at the same time, eg. 'D' --> 'E' # and 'Occ=' --> '', etc. orb_no = np.empty((nvec, ), dtype=np.int64) occ = np.empty((nvec, ), dtype=np.float64) nrg = np.empty((nvec, ), dtype=np.float64) x = np.empty((nvec, ), dtype=np.float64) y = np.empty((nvec, ), dtype=np.float64) z = np.empty((nvec, ), dtype=np.float64) frame = np.empty((nvec, ), dtype=np.int64) fc = -1 # Frame counter oc = 0 # Orbital counter for s, e in zip(starts, stops): fc += 1 for line in self[s:e]: ls = line.split() if 'Vector' in line: orb_no[oc] = ls[1] occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E') nrg[oc] = ls[3].replace('E=', '').replace('D', 'E') if 'E=-' in line else ls[4].replace('D', 'E') frame[oc] = fc elif 'MO Center' in line: x[oc] = ls[2].replace(',', '').replace('D', 'E') y[oc] = ls[3].replace(',', '').replace('D', 'E') z[oc] = ls[4].replace(',', '').replace('D', 'E') oc += 1 orb_no -= 1 return pd.DataFrame.from_dict({'x': x, 'y': z, 'z': z, 'frame': frame, 'vector': orb_no, 'occupation': occ, 'energy': nrg}) def parse_basis_set(self): """ Parse the :class:`~exatomic.core.basis.BasisSet` dataframe. """ if not hasattr(self, "atom"): self.parse_atom() _rebas01 = ' Basis "' _rebas02 = ' Summary of "' _rebas03 = [' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ', ' j ', ' k ', ' l ', ' m ', ' p '] found = self.find(_rebas01, _rebas02) spherical = True if "spherical" in found[_rebas01][0][1] else False start = found[_rebas01][0][0] + 2 idx = 1 if len(found[_rebas02]) > 1 else -1 stop = found[_rebas02][idx][0] - 1 # Read in all of the extra lines that contain ---- and tag names df = pd.read_fwf(StringIO("\n".join(self[start:stop])), widths=(4, 2, 16, 16), names=("shell", "L", "alpha", "d")) df.loc[df['shell'] == "--", "shell"] = np.nan tags = df.loc[(df['shell'].str.isdigit() == False), "shell"] idxs = tags.index.tolist() idxs.append(len(df)) df['set'] = "" for i, tag in enumerate(tags): df.loc[idxs[i]:idxs[i + 1], "set"] = tag df = df.dropna().reset_index(drop=True) mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()} df['set'] = df['set'].map(mapper) df['L'] = df['L'].str.strip().str.lower().map(lmap) df['alpha'] = df['alpha'].astype(float) df['d'] = df['d'].astype(float) # NO SUPPORT FOR MULTIPLE FRAMES? df['frame'] = 0 self.basis_set = BasisSet(df) self.meta['spherical'] = spherical self.atom['set'] = self.atom['tag'].map(mapper) def parse_basis_set_order(self): dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')] if 'spherical' not in self.meta: self.parse_basis_set() if self.meta['spherical']: dtype += [('ml', 'i8')] else: dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')] mapper = self.basis_set.functions(self.meta['spherical']).groupby(level="set").sum() nbas = self.atom['set'].map(mapper).sum() bso = np.empty((nbas,), dtype=dtype) cnt = 0 bases = self.basis_set.groupby('set') for seht, center in zip(self.atom['set'], self.atom.index): bas = bases.get_group(seht).groupby('shell') if self.meta['spherical']: for shell, grp in bas: l = grp['L'].values[0] for ml in spherical_ordering_function(l): bso[cnt] = (center, shell, l, ml) cnt += 1 else: for shell, grp in bas: l = grp['L'].values[0] for _, ll, m, n in cartesian_ordering_function(l): bso[cnt] = (center, shell, l, ll, m, n) cnt += 1 bso = pd.DataFrame(bso) bso['frame'] = 0 # New shell definition consistent with basis internals shls = [] grps = bso.groupby(['center', 'L']) cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for (cen, L), grp in grps: for ml in grp['ml']: shls.append(cache[cen][L][ml]) cache[cen][L][ml] += 1 bso['shell'] = shls self.basis_set_order = bso def parse_frame(self): """ Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed) :class:`~exatomic.core.atom.Atom` object. """ _rescfen = 'Total SCF energy' _redften = 'Total DFT energy' self.frame = compute_frame_from_atom(self.atom) found = self.find(_rescfen, _redften) scfs = found[_rescfen] dfts = found[_redften] if scfs and dfts: print('Warning: found total energies from scf and dft, using dft') dfts = [float(val.split()[-1]) for key, val in dfts] self.frame['total_energy'] = dfts elif scfs: scfs = [float(val.split()[-1]) for key, val in scfs] self.frame['total_energy'] = scfs elif dfts: dfts = [float(val.split()[-1]) for key, val in dfts] self.frame['total_energy'] = dfts def __init__(self, *args, **kwargs): super(Output, self).__init__(*args, **kwargs)
class Output(six.with_metaclass(OutMeta, Editor)): """Editor for NWChem calculation output file (stdout).""" def parse_atom(self): """Parse the atom dataframe.""" _reatom01 = 'Geometry "' _reatom02 = 'Atomic Mass' _reatom03 = 'ECP "ecp basis"' _reatom04 = 'Output coordinates in' found = self.find(_reatom01, _reatom02, _reatom03, _reatom04, keys_only=True) unit = self[found[_reatom04][0]].split()[3] unit = "Angstrom" if unit == "angstroms" else "au" starts = np.array(found[_reatom01]) + 7 stops = np.array(found[_reatom02]) - 1 ecps = np.array(found[_reatom03]) + 2 ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps} columns = ['label', 'tag', 'Z', 'x', 'y', 'z'] atom = pd.concat([ self.pandas_dataframe(s, e, columns) for s, e in zip(starts, stops) ]) atom['symbol'] = atom['tag'].str.extract( '([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title() atom['Z'] = atom['Z'].astype(np.int64) atom['Zeff'] = (atom['Z'] - atom['tag'].map(ecps).fillna(value=0)).astype(np.int64) #n = len(atom) nf = atom.label.value_counts().max() nat = atom.label.max() atom['frame'] = [i for i in range(nf) for j in range(nat)] atom['label'] -= 1 atom['x'] *= Length[unit, 'au'] atom['y'] *= Length[unit, 'au'] atom['z'] *= Length[unit, 'au'] if atom['frame'].max() > 0: li = atom['frame'].max() atom = atom[~(atom['frame'] == li)] atom.reset_index(drop=True, inplace=True) del atom['label'] self.atom = Atom(atom) def parse_orbital(self): """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe.""" orbital = None _remo01 = 'Molecular Orbital Analysis' _remo02 = 'alpha - beta orbital overlaps' _remo03 = 'center of mass' check = self.find(_remo01) if any(['Alpha' in value for value in check]): alpha_starts = np.array( [no for no, line in check if 'Alpha' in line], dtype=np.int64) + 2 alpha_stops = np.array( [no for no, line in check if 'Beta' in line], dtype=np.int64) - 1 beta_starts = alpha_stops + 3 beta_stops = np.array(self.find(_remo02, keys_only=True), dtype=np.int64) - 1 alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops) beta_orbital = self._parse_orbital(beta_starts, beta_stops) alpha_orbital['spin'] = 0 beta_orbital['spin'] = 1 orbital = pd.concat((alpha_orbital, beta_orbital), ignore_index=True) else: starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2 stops = np.array(self.find(_remo03, keys_only=True), dtype=np.int64) - 1 orbital = self._parse_orbital(starts, stops) orbital['spin'] = 0 orbital['group'] = 0 self.orbital = Orbital(orbital) def parse_momatrix(self): """ Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe. Note: Must supply 'print "final vectors" "final vectors analysis"' for momatrix """ key0 = "Final MO vectors" key1 = "center of mass" found = self.find(key0, key1) if found[key0]: start = found[key0][0][0] + 6 end = found[key1][0][0] - 1 c = pd.read_fwf(StringIO("\n".join(self[start:end])), widths=(6, 12, 12, 12, 12, 12, 12), names=list(range(7))) self.c = c idx = c[c[0].isnull()].index.values c = c[~c.index.isin(idx)] del c[0] nbas = len(self.basis_set_order) n = c.shape[0] // nbas coefs = [] # The for loop below is like numpy.array_split(df, n); using numpy.array_split # with dataframes seemed to have strange results where splits had wrong sizes? for i in range(n): coefs.append(c.iloc[i * nbas:(i + 1) * nbas, :].astype(float).dropna( axis=1).values.ravel("F")) c = np.concatenate(coefs) del coefs orbital, chi = _square_indices(len(self.basis_set_order)) self.momatrix = MOMatrix.from_dict({ 'coef': c, 'chi': chi, 'orbital': orbital, 'frame': 0 }) # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital}) # momatrix['frame'] = 0 # self.momatrix = momatrix def _parse_orbital(self, starts, stops): ''' This function actually performs parsing of :class:`~exatomic.orbital.Orbital` See Also: :func:`~exnwchem.output.Output.parse_orbital` ''' joined = '\n'.join( ['\n'.join(self[s:e]) for s, e in zip(starts, stops)]) nvec = joined.count('Vector') if 'spherical' not in self.meta: self.parse_basis_set() mapper = self.basis_set.functions( self.meta['spherical']).groupby(level="set").sum() nbas = self.atom['set'].map(mapper).sum() nbas *= nvec # Orbital dataframe -- alternatively one could parse the strings # into the DataFrame and then use the pd.Series.str methods to # perform all the replacements at the same time, eg. 'D' --> 'E' # and 'Occ=' --> '', etc. orb_no = np.empty((nvec, ), dtype=np.int64) occ = np.empty((nvec, ), dtype=np.float64) nrg = np.empty((nvec, ), dtype=np.float64) x = np.empty((nvec, ), dtype=np.float64) y = np.empty((nvec, ), dtype=np.float64) z = np.empty((nvec, ), dtype=np.float64) frame = np.empty((nvec, ), dtype=np.int64) fc = -1 # Frame counter oc = 0 # Orbital counter for s, e in zip(starts, stops): fc += 1 for line in self[s:e]: ls = line.split() if 'Vector' in line: orb_no[oc] = ls[1] occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E') nrg[oc] = ls[3].replace('E=', '').replace( 'D', 'E') if 'E=-' in line else ls[4].replace( 'D', 'E') frame[oc] = fc elif 'MO Center' in line: x[oc] = ls[2].replace(',', '').replace('D', 'E') y[oc] = ls[3].replace(',', '').replace('D', 'E') z[oc] = ls[4].replace(',', '').replace('D', 'E') oc += 1 orb_no -= 1 return pd.DataFrame.from_dict({ 'x': x, 'y': z, 'z': z, 'frame': frame, 'vector': orb_no, 'occupation': occ, 'energy': nrg }) def parse_basis_set(self): """ Parse the :class:`~exatomic.core.basis.BasisSet` dataframe. """ if not hasattr(self, "atom"): self.parse_atom() _rebas01 = ' Basis "' _rebas02 = ' Summary of "' _rebas03 = [ ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ', ' j ', ' k ', ' l ', ' m ', ' p ' ] found = self.find(_rebas01, _rebas02) spherical = True if "spherical" in found[_rebas01][0][1] else False start = found[_rebas01][0][0] + 2 idx = 1 if len(found[_rebas02]) > 1 else -1 stop = found[_rebas02][idx][0] - 1 # Read in all of the extra lines that contain ---- and tag names df = pd.read_fwf(StringIO("\n".join(self[start:stop])), widths=(4, 2, 16, 16), names=("shell", "L", "alpha", "d")) df.loc[df['shell'] == "--", "shell"] = np.nan tags = df.loc[(df['shell'].str.isdigit() == False), "shell"] idxs = tags.index.tolist() idxs.append(len(df)) df['set'] = "" for i, tag in enumerate(tags): df.loc[idxs[i]:idxs[i + 1], "set"] = tag df = df.dropna().reset_index(drop=True) mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()} df['set'] = df['set'].map(mapper) df['L'] = df['L'].str.strip().str.lower().map(lmap) df['alpha'] = df['alpha'].astype(float) df['d'] = df['d'].astype(float) # NO SUPPORT FOR MULTIPLE FRAMES? df['frame'] = 0 self.basis_set = BasisSet(df) self.meta['spherical'] = spherical self.atom['set'] = self.atom['tag'].map(mapper) def parse_basis_set_order(self): dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')] if 'spherical' not in self.meta: self.parse_basis_set() if self.meta['spherical']: dtype += [('ml', 'i8')] else: dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')] mapper = self.basis_set.functions( self.meta['spherical']).groupby(level="set").sum() nbas = self.atom['set'].map(mapper).sum() bso = np.empty((nbas, ), dtype=dtype) cnt = 0 bases = self.basis_set.groupby('set') for seht, center in zip(self.atom['set'], self.atom.index): bas = bases.get_group(seht).groupby('shell') if self.meta['spherical']: for shell, grp in bas: l = grp['L'].values[0] for ml in spherical_ordering_function(l): bso[cnt] = (center, shell, l, ml) cnt += 1 else: for shell, grp in bas: l = grp['L'].values[0] for _, ll, m, n in cartesian_ordering_function(l): bso[cnt] = (center, shell, l, ll, m, n) cnt += 1 bso = pd.DataFrame(bso) bso['frame'] = 0 # New shell definition consistent with basis internals shls = [] grps = bso.groupby(['center', 'L']) cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for (cen, L), grp in grps: for ml in grp['ml']: shls.append(cache[cen][L][ml]) cache[cen][L][ml] += 1 bso['shell'] = shls self.basis_set_order = bso def parse_frame(self): """ Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed) :class:`~exatomic.core.atom.Atom` object. """ _rescfen = 'Total SCF energy' _redften = 'Total DFT energy' self.frame = compute_frame_from_atom(self.atom) found = self.find(_rescfen, _redften) scfs = found[_rescfen] dfts = found[_redften] if scfs and dfts: print('Warning: found total energies from scf and dft, using dft') dfts = [float(val.split()[-1]) for key, val in dfts] self.frame['total_energy'] = dfts elif scfs: scfs = [float(val.split()[-1]) for key, val in scfs] self.frame['total_energy'] = scfs elif dfts: dfts = [float(val.split()[-1]) for key, val in dfts] self.frame['total_energy'] = dfts def __init__(self, *args, **kwargs): super(Output, self).__init__(*args, **kwargs)
class TestBasisSet(TestCase): def setUp(self): adict = {col: [0] for col in BasisSet._columns} adict['frame'] = 0 # Trivial basis set self.bs = BasisSet(adict) self.bs['alpha'] = self.bs['alpha'].astype(np.float64) self.bs['d'] = self.bs['d'].astype(np.float64) # Medium basis set self.mbs = BasisSet({ 'frame': 0, 'alpha': [5., 1., 1.], 'd': [1., 1., 1.], 'shell': [0, 1, 0], 'set': [0, 0, 1], 'L': [0, 1, 0], 'n': [1, 2, 1] }) # Large basis set self.lbs = BasisSet({ 'frame': 0, 'alpha': [5., 3., 1., 3., 1., 1., 3., 1., 1.], 'd': [1., 1., 1., 1., 1., 1., 1., 1., 1.], 'shell': [0, 0, 0, 1, 1, 2, 0, 0, 1], 'set': [0, 0, 0, 0, 0, 0, 1, 1, 1], 'L': [0, 0, 0, 1, 1, 2, 0, 0, 1] }) def test_lmax(self): self.assertEqual(self.bs.lmax, 0) self.assertEqual(self.mbs.lmax, 1) self.assertEqual(self.lbs.lmax, 2) def test_shells(self): self.bs.shells() self.mbs.shells() self.lbs.shells() def test_functions_by_shell(self): n = ['set', 'L'] mfp = pd.MultiIndex.from_product mfa = pd.MultiIndex.from_arrays self.assertTrue((self.bs.functions_by_shell() == pd.Series( [1], index=mfp([[0], [0]], names=n))).all()) self.assertTrue((self.mbs.functions_by_shell() == pd.Series( [1, 1, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all()) self.assertTrue((self.lbs.functions_by_shell() == pd.Series( [1, 1, 1, 1, 1], index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all()) def test_primitives_by_shell(self): n = ['set', 'L'] mfp = pd.MultiIndex.from_product mfa = pd.MultiIndex.from_arrays self.assertTrue((self.bs.primitives_by_shell() == pd.Series( [1], index=mfp([[0], [0]], names=n))).all()) self.assertTrue((self.mbs.primitives_by_shell() == pd.Series( [1, 1, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all()) self.assertTrue((self.lbs.primitives_by_shell() == pd.Series( [3, 2, 1, 2, 1], index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all()) def test_functions(self): n = ['set', 'L'] mfp = pd.MultiIndex.from_product mfa = pd.MultiIndex.from_arrays self.assertTrue( (self.bs.functions(False) == pd.Series([1], index=mfp([[0], [0]], names=n))).all()) self.assertTrue( (self.bs.functions(True) == pd.Series([1], index=mfp([[0], [0]], names=n))).all()) self.assertTrue((self.mbs.functions(False) == pd.Series( [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all()) self.assertTrue((self.mbs.functions(True) == pd.Series( [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all()) self.assertTrue((self.lbs.functions(False) == pd.Series( [1, 3, 6, 1, 3], index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all()) self.assertTrue((self.lbs.functions(True) == pd.Series( [1, 3, 5, 1, 3], index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all()) def test_primitives(self): n = ['set', 'L'] mfp = pd.MultiIndex.from_product mfa = pd.MultiIndex.from_arrays self.assertTrue( (self.bs.primitives(False) == pd.Series([1], index=mfp([[0], [0]], names=n))).all()) self.assertTrue( (self.bs.primitives(True) == pd.Series([1], index=mfp([[0], [0]], names=n))).all()) self.assertTrue((self.mbs.primitives(False) == pd.Series( [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all()) self.assertTrue((self.mbs.primitives(True) == pd.Series( [1, 3, 1], index=mfa([[0, 0, 1], [0, 1, 0]], names=n))).all()) self.assertTrue((self.lbs.primitives(False) == pd.Series( [3, 6, 6, 2, 3], index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all()) self.assertTrue((self.lbs.primitives(True) == pd.Series( [3, 6, 5, 2, 3], index=mfa([[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]], names=n))).all())
class Output(six.with_metaclass(OutMeta, Editor)): """Editor for NWChem calculation output file (stdout).""" def parse_atom(self): """Parse the atom dataframe.""" _reatom01 = 'Geometry "' _reatom02 = 'Atomic Mass' _reatom03 = 'ECP "ecp basis"' _reatom04 = 'Output coordinates in' found = self.find(_reatom01, _reatom02, _reatom03, _reatom04, keys_only=True) unit = self[found[_reatom04][0]].split()[3] unit = "Angstrom" if unit == "angstroms" else "au" starts = np.array(found[_reatom01]) + 7 stops = np.array(found[_reatom02]) - 1 ecps = np.array(found[_reatom03]) + 2 ecps = {self[ln].split()[0]: int(self[ln].split()[3]) for ln in ecps} columns = ['label', 'tag', 'Z', 'x', 'y', 'z'] atom = pd.concat([ self.pandas_dataframe(s, e, columns) for s, e in zip(starts, stops) ]) atom['symbol'] = atom['tag'].str.extract( '([A-z]{1,})([0-9]*)', expand=False)[0].str.lower().str.title() atom['Z'] = atom['Z'].astype(np.int64) atom['Zeff'] = (atom['Z'] - atom['tag'].map(ecps).fillna(value=0)).astype(np.int64) #n = len(atom) nf = atom.label.value_counts().max() nat = atom.label.max() atom['frame'] = [i for i in range(nf) for j in range(nat)] atom['label'] -= 1 atom['x'] *= Length[unit, 'au'] atom['y'] *= Length[unit, 'au'] atom['z'] *= Length[unit, 'au'] if atom['frame'].max() > 0: li = atom['frame'].max() atom = atom[~(atom['frame'] == li)] atom.reset_index(drop=True, inplace=True) del atom['label'] self.atom = Atom(atom) def parse_orbital(self): """Parse the :class:`~exatomic.core.orbital.Orbital` dataframe.""" orbital = None _remo01 = 'Molecular Orbital Analysis' _remo02 = 'alpha - beta orbital overlaps' _remo03 = 'center of mass' check = self.find(_remo01) if any(['Alpha' in value for value in check]): alpha_starts = np.array( [no for no, line in check if 'Alpha' in line], dtype=np.int64) + 2 alpha_stops = np.array( [no for no, line in check if 'Beta' in line], dtype=np.int64) - 1 beta_starts = alpha_stops + 3 beta_stops = np.array(self.find(_remo02, keys_only=True), dtype=np.int64) - 1 alpha_orbital = self._parse_orbital(alpha_starts, alpha_stops) beta_orbital = self._parse_orbital(beta_starts, beta_stops) alpha_orbital['spin'] = 0 beta_orbital['spin'] = 1 orbital = pd.concat((alpha_orbital, beta_orbital), ignore_index=True) else: starts = np.array(list(zip(*check))[0], dtype=np.int64) + 2 stops = np.array(self.find(_remo03, keys_only=True), dtype=np.int64) - 1 orbital = self._parse_orbital(starts, stops) orbital['spin'] = 0 orbital['group'] = 0 self.orbital = Orbital(orbital) def parse_momatrix(self): """ Parse the :class:`~exatomic.core.orbital.MOMatrix` dataframe. Note: Must supply 'print "final vectors" "final vectors analysis"' for momatrix """ key0 = "Final MO vectors" key1 = "center of mass" found = self.find(key0, key1) if found[key0]: start = found[key0][0][0] + 6 end = found[key1][0][0] - 1 c = pd.read_fwf(StringIO("\n".join(self[start:end])), widths=(6, 12, 12, 12, 12, 12, 12), names=list(range(7))) self.c = c idx = c[c[0].isnull()].index.values c = c[~c.index.isin(idx)] del c[0] nbas = len(self.basis_set_order) n = c.shape[0] // nbas coefs = [] # The for loop below is like numpy.array_split(df, n); using numpy.array_split # with dataframes seemed to have strange results where splits had wrong sizes? for i in range(n): coefs.append(c.iloc[i * nbas:(i + 1) * nbas, :].astype(float).dropna( axis=1).values.ravel("F")) c = np.concatenate(coefs) del coefs orbital, chi = _square_indices(len(self.basis_set_order)) self.momatrix = MOMatrix.from_dict({ 'coef': c, 'chi': chi, 'orbital': orbital, 'frame': 0 }) # momatrix = pd.DataFrame.from_dict({'coef': c, 'chi': chi, 'orbital': orbital}) # momatrix['frame'] = 0 # self.momatrix = momatrix def _parse_orbital(self, starts, stops): ''' This function actually performs parsing of :class:`~exatomic.orbital.Orbital` See Also: :func:`~exnwchem.output.Output.parse_orbital` ''' joined = '\n'.join( ['\n'.join(self[s:e]) for s, e in zip(starts, stops)]) nvec = joined.count('Vector') if 'spherical' not in self.meta: self.parse_basis_set() mapper = self.basis_set.functions( self.meta['spherical']).groupby(level="set").sum() nbas = self.atom['set'].map(mapper).sum() nbas *= nvec # Orbital dataframe -- alternatively one could parse the strings # into the DataFrame and then use the pd.Series.str methods to # perform all the replacements at the same time, eg. 'D' --> 'E' # and 'Occ=' --> '', etc. orb_no = np.empty((nvec, ), dtype=np.int64) occ = np.empty((nvec, ), dtype=np.float64) nrg = np.empty((nvec, ), dtype=np.float64) x = np.empty((nvec, ), dtype=np.float64) y = np.empty((nvec, ), dtype=np.float64) z = np.empty((nvec, ), dtype=np.float64) frame = np.empty((nvec, ), dtype=np.int64) fc = -1 # Frame counter oc = 0 # Orbital counter for s, e in zip(starts, stops): fc += 1 for line in self[s:e]: ls = line.split() if 'Vector' in line: orb_no[oc] = ls[1] occ[oc] = ls[2].replace('Occ=', '').replace('D', 'E') nrg[oc] = ls[3].replace('E=', '').replace( 'D', 'E') if 'E=-' in line else ls[4].replace( 'D', 'E') frame[oc] = fc elif 'MO Center' in line: x[oc] = ls[2].replace(',', '').replace('D', 'E') y[oc] = ls[3].replace(',', '').replace('D', 'E') z[oc] = ls[4].replace(',', '').replace('D', 'E') oc += 1 orb_no -= 1 return pd.DataFrame.from_dict({ 'x': x, 'y': z, 'z': z, 'frame': frame, 'vector': orb_no, 'occupation': occ, 'energy': nrg }) def parse_basis_set(self): """ Parse the :class:`~exatomic.core.basis.BasisSet` dataframe. """ if not hasattr(self, "atom"): self.parse_atom() _rebas01 = ' Basis "' _rebas02 = ' Summary of "' _rebas03 = [ ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ', ' j ', ' k ', ' l ', ' m ', ' p ' ] found = self.find(_rebas01, _rebas02) spherical = True if "spherical" in found[_rebas01][0][1] else False start = found[_rebas01][0][0] + 2 idx = 1 if len(found[_rebas02]) > 1 else -1 stop = found[_rebas02][idx][0] - 1 # Read in all of the extra lines that contain ---- and tag names df = pd.read_fwf(StringIO("\n".join(self[start:stop])), widths=(4, 2, 16, 16), names=("shell", "L", "alpha", "d")) df.loc[df['shell'] == "--", "shell"] = np.nan tags = df.loc[(df['shell'].str.isdigit() == False), "shell"] idxs = tags.index.tolist() idxs.append(len(df)) df['set'] = "" for i, tag in enumerate(tags): df.loc[idxs[i]:idxs[i + 1], "set"] = tag df = df.dropna().reset_index(drop=True) mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()} df['set'] = df['set'].map(mapper) df['L'] = df['L'].str.strip().str.lower().map(lmap) df['alpha'] = df['alpha'].astype(float) df['d'] = df['d'].astype(float) # NO SUPPORT FOR MULTIPLE FRAMES? df['frame'] = 0 self.basis_set = BasisSet(df) self.meta['spherical'] = spherical self.atom['set'] = self.atom['tag'].map(mapper) def parse_basis_set_order(self): dtype = [('center', 'i8'), ('shell', 'i8'), ('L', 'i8')] if 'spherical' not in self.meta: self.parse_basis_set() if self.meta['spherical']: dtype += [('ml', 'i8')] else: dtype += [('l', 'i8'), ('m', 'i8'), ('n', 'i8')] mapper = self.basis_set.functions( self.meta['spherical']).groupby(level="set").sum() nbas = self.atom['set'].map(mapper).sum() bso = np.empty((nbas, ), dtype=dtype) cnt = 0 bases = self.basis_set.groupby('set') for seht, center in zip(self.atom['set'], self.atom.index): bas = bases.get_group(seht).groupby('shell') if self.meta['spherical']: for shell, grp in bas: l = grp['L'].values[0] for ml in spherical_ordering_function(l): bso[cnt] = (center, shell, l, ml) cnt += 1 else: for shell, grp in bas: l = grp['L'].values[0] for _, ll, m, n in cartesian_ordering_function(l): bso[cnt] = (center, shell, l, ll, m, n) cnt += 1 bso = pd.DataFrame(bso) bso['frame'] = 0 # New shell definition consistent with basis internals shls = [] grps = bso.groupby(['center', 'L']) cache = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for (cen, L), grp in grps: for ml in grp['ml']: shls.append(cache[cen][L][ml]) cache[cen][L][ml] += 1 bso['shell'] = shls self.basis_set_order = bso def parse_roa(self): """ Parse the :class:`~exatomic.core.tensor.Polarizability` dataframe. This will parse the output from the Raman Optical Activity outputs. Note: We generate a 3D tensor with the 2D tensor code. 3D tensors will have 3 rows labeled with the same name. """ _reroa = 'roa begin' _reare = 'alpha real' _reaim = 'alpha im' # _reombre = 'beta real' # _reombim = 'beta im' _reombre = 'omega beta(real)' _reombim = 'omega beta(imag)' _redqre = 'dipole-quadrupole real (Cartesian)' _redqim = 'dipole-quadrupole imag (Cartesian)' if not self.find(_reroa): return found_2d = self.find(_reare, _reaim, _reombre, _reombim, keys_only=True) found_3d = self.find(_redqre, _redqim, keys_only=True) data = {} start = np.array(list(found_2d.values())).reshape(4, ) + 1 end = np.array(list(found_2d.values())).reshape(4, ) + 10 columns = ['x', 'val'] data = [ self.pandas_dataframe(s, e, columns) for s, e in zip(start, end) ] df = pd.concat([dat for dat in data]).reset_index(drop=True) df['grp'] = [i for i in range(4) for j in range(9)] df = df[['val', 'grp']] df = pd.DataFrame( df.groupby('grp').apply( lambda x: x.unstack().values[:-9]).values.tolist(), columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']) # find the electric dipole-quadrupole polarizability # NWChem gives this as a list of 18 values assuming the matrix to be symmetric # for our implementation we need to extend it to 27 elements # TODO: check that NWChem does assume that the 3D tensors are symmetric start = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 1 end = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 19 data = [ self.pandas_dataframe(s, e, columns) for s, e in zip(start, end) ] df3 = pd.concat([dat for dat in data]).reset_index(drop=True) vals = df3['val'].values.reshape(2, 3, 6) adx = np.triu_indices(3) mat = np.zeros((2, 3, 3, 3)) for i in range(2): for j in range(3): mat[i][j][adx] = vals[i][j] mat[i][j] = mat[i][j] + np.transpose( mat[i][j]) - np.identity(3) * mat[i][j] mat = mat.reshape(18, 3) df3 = pd.DataFrame(mat, columns=['x', 'y', 'z']) df3['grp1'] = [i for i in range(2) for j in range(9)] df3['grp2'] = [j for i in range(2) for j in range(3) for n in range(3)] df3 = pd.DataFrame( df3.groupby([ 'grp1', 'grp2' ]).apply(lambda x: x.unstack().values[:-6]).values.tolist(), columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'], index=[ 'Ax_real', 'Ay_real', 'Az_real', 'Ax_imag', 'Ay_imag', 'Az_imag' ]) split_label = np.transpose([i.split('_') for i in df3.index.values]) label = split_label[0] types = split_label[1] df['label'] = found_2d.keys() df['label'].replace( [_reare, _reombre, _reaim, _reombim], ['alpha-real', 'g_prime-real', 'alpha-imag', 'g_prime-imag'], inplace=True) df['type'] = [i.split('-')[-1] for i in df['label'].values] df['label'] = [i.split('-')[0] for i in df['label'].values] df['frame'] = np.repeat([0], len(df.index)) df3['label'] = label df3['type'] = types df3['frame'] = np.repeat([0], len(df3.index)) self.roa = pd.concat([df, df3], ignore_index=True) def parse_frequency(self): """ Parse the :class:`~exatomic.core.atom.Frequency` dataframe. Note: This code removes all negative frequencies. """ _remeth = "NORMAL MODE EIGENVECTORS IN CARTESIAN COORDINATES" _refreq = "Frequency" _renat = "Atom information" found = self.find(_remeth) fnat = self.find(_renat) if not found and not fnat: return # get atom information start = fnat[0][0] + 3 stop = start while '----' not in self[stop]: stop += 1 # we assume that there is only one instance of where _renat is found columns = ['symbol', 'atom', 'x', 'y', 'z', 'mass'] atom = self.pandas_dataframe(start, stop, columns) atom['atom'] -= 1 nat = len(atom) # find bounds where the calculated frequencies are start = found[0][0] stop = found[1][0] # get the data found = self.find(_refreq, start=start, stop=stop) dfs = [] fdx = 0 # get frequencies for lno, ln in found: # get the frequency values tmp = ln.split()[1:] freq = np.asarray([float(i) for i in tmp]) ## TODO: here we remove all negative frequencies ## need to find out if this is ok to do # set start and end points for the calculated normal modes staf = lno + start + 1 stof = lno + start + nat * 3 + 2 nm = self.pandas_dataframe(staf, stof, ncol=len(freq)).reset_index(drop=True) # generate boolean array that shows False for negative frequencies neg = [not f < 0 for f in freq] # remove negative frequencies nm.drop(columns=[idx for idx, val in enumerate(neg) if not val], inplace=True) freq = freq[neg] # get normal modes in the x, y, z directions nm = nm.stack().values nfreq = len(freq) dx = nm[::3] dy = nm[1::3] dz = nm[2::3] # assemble dataframe symbol = np.tile(atom['symbol'], nfreq) adx = np.tile(atom['atom'], nfreq) freq = np.repeat(freq, nat) freqdx = np.repeat([i for i in range(fdx, fdx + nfreq)], nfreq) frames = np.repeat([0], nfreq * nat) fdx += nfreq stacked = pd.DataFrame.from_dict({ 'symbol': symbol, 'atom': adx, 'dx': dx, 'dy': dy, 'dz': dz, 'freq': freq, 'freqdx': freqdx, 'frames': frames }) dfs.append(stacked) frequency = pd.concat(dfs).reset_index(drop=True) self.frequency = frequency def parse_gradient(self): """ Parse :class:`exatomic.core.gradient.Gradient` dataframe. """ _regrad = "DFT ENERGY GRADIENTS" found = self.find(_regrad) if not found: return found = self.find(_regrad, keys_only=True) # find start and stop points starts = np.array(found) + 4 stop = starts[0] while '----' not in self[stop]: stop += 1 # backtrack one line as the line after the needed info is empty stop -= 1 stops = starts + (stop - starts[0]) dfs = [] # generate dataframe array columns = ['atom', 'symbol', 'x', 'y', 'z', 'fx', 'fy', 'fz'] for i, (start, stop) in enumerate(zip(starts, stops)): gradient = self.pandas_dataframe(start, stop, columns) gradient['frame'] = i dfs.append(gradient[['atom', 'symbol', 'fx', 'fy', 'fz', 'frame']]) # construct the dataframe gradient = pd.concat(dfs).reset_index(drop=True) gradient['Z'] = gradient['symbol'].map(sym2z) # want to keep more or less the same order across dataframes # or at least try self.gradient = gradient[[ 'Z', 'atom', 'fx', 'fy', 'fz', 'symbol', 'frame' ]] def parse_frame(self): """ Create a minimal :class:`~exatomic.core.frame.Frame` from the (parsed) :class:`~exatomic.core.atom.Atom` object. """ _rescfen = 'Total SCF energy' _redften = 'Total DFT energy' self.frame = compute_frame_from_atom(self.atom) found = self.find(_rescfen, _redften) scfs = found[_rescfen] dfts = found[_redften] if scfs and dfts: print('Warning: found total energies from scf and dft, using dft') dfts = [float(val.split()[-1]) for key, val in dfts] self.frame['total_energy'] = dfts elif scfs: scfs = [float(val.split()[-1]) for key, val in scfs] self.frame['total_energy'] = scfs elif dfts: dfts = [float(val.split()[-1]) for key, val in dfts] self.frame['total_energy'] = dfts def __init__(self, *args, **kwargs): super(Output, self).__init__(*args, **kwargs)