def setUp(self): adict = {col: [0] for col in BasisSet._columns} adict['frame'] = 0 # Trivial basis set self.bs = BasisSet(adict) self.bs['alpha'] = self.bs['alpha'].astype(np.float64) self.bs['d'] = self.bs['d'].astype(np.float64) # Medium basis set self.mbs = BasisSet({ 'frame': 0, 'alpha': [5., 1., 1.], 'd': [1., 1., 1.], 'shell': [0, 1, 0], 'set': [0, 0, 1], 'L': [0, 1, 0], 'n': [1, 2, 1] }) # Large basis set self.lbs = BasisSet({ 'frame': 0, 'alpha': [5., 3., 1., 3., 1., 1., 3., 1., 1.], 'd': [1., 1., 1., 1., 1., 1., 1., 1., 1.], 'shell': [0, 0, 0, 1, 1, 2, 0, 0, 1], 'set': [0, 0, 0, 0, 0, 0, 1, 1, 1], 'L': [0, 0, 0, 1, 1, 2, 0, 0, 1] })
def parse_basis_set(self): # Basis flags _rebas02 = 'AO basis set in the form of general basis input' _rebas03 = ' (Standard|General) basis' _basrep = {'D 0': 'D0 ', 'F 0': 'F0 ', 'G 0': 'G0 ', 'H 0': 'H0 ', 'I 0': 'I0 '} _rebaspat = re.compile('|'.join(_basrep.keys())) # Find the basis set found = self.regex(_rebas02, _rebas03, keys_only=True) if not found[_rebas02]: return start = stop = found[_rebas02][0] + 1 while self[stop].strip(): stop += 1 # Raw data df = self.pandas_dataframe(start, stop, 4) def _padx(srs): return [0] + srs.tolist() + [df.shape[0]] # Get some indices for appropriate columns setdx = _padx(df[0][df[0] == '****'].index) shldx = _padx(df[3][~np.isnan(df[3])].index) lindx = df[0][df[0].str.lower().isin(lorder + ['sp'])] # Populate the df df['L'] = lindx.str.lower().map(lmap) df['L'] = df['L'].fillna(method='ffill').fillna( method='bfill').astype(np.int64) df['center'] = np.concatenate([np.repeat(i, stop - start) for i, (start, stop) in enumerate(zip(setdx, setdx[1:]))]) df['shell'] = np.concatenate([np.repeat(i-1, stop - start) for i, (start, stop) in enumerate(zip(shldx, shldx[1:]))]) # Complicated way to get shells but it is flat maxshl = df.groupby('center').apply(lambda x: x.shell.max() + 1) maxshl.index += 1 maxshl[0] = 0 df['shell'] = df['shell'] - df['center'].map(maxshl) # Drop all the garbage todrop = setdx[:-1] + [i+1 for i in setdx[:-2]] + lindx.index.tolist() df.drop(todrop, inplace=True) # Keep cleaning if df[0].dtype == 'object': df[0] = df[0].str.replace('D', 'E').astype(np.float64) if df[1].dtype == 'object': df[1] = df[1].str.replace('D', 'E').astype(np.float64) try: sp = np.isnan(df[2]).sum() == df.shape[0] except TypeError: df[2] = df[2].str.replace('D', 'E').astype(np.float64) sp = True df.rename(columns={0: 'alpha', 1: 'd'}, inplace=True) # Deduplicate basis sets and expand 'SP' shells if present df, setmap = deduplicate_basis_sets(df, sp=sp) spherical = '5D' in self[found[_rebas03][0]] if df['L'].max() < 2: spherical = True self.basis_set = BasisSet(df) self.meta['spherical'] = spherical self.atom['set'] = self.atom['set'].map(setmap)
def parse_basis_set(self): # Find the basis set _re_bas_00 = '(Slater-type) F U N C T I O N S' _re_bas_01 = 'Atom Type' start = self.find(_re_bas_00, keys_only=True)[-1] + 3 starts = self.find(_re_bas_01, start=start, keys_only=True) lines = [] for ext in starts: for i in range(4): lines.append(start + ext + i) stop = start + ext + 4 while self[stop].strip(): lines.append(stop) stop += 1 df = pd.read_fwf(StringIO('\n'.join([self[i] for i in lines])), widths=[4, 2, 12, 4], names=['n', 'L', 'alpha', 'symbol']) # Where atom types change idxs = [0] + df['n'][df['n'] == '---'].index.tolist() + [df.shape[0]] sets, shells = [], [] for i, (start, stop) in enumerate(zip(idxs, idxs[1:])): sets.append(np.repeat(i - 1, stop - start)) shells.append(np.arange(-1, stop - start - 1)) df['set'] = np.concatenate(sets) df['shell'] = np.concatenate(shells) # Atom table basis set map basmap = df['symbol'].dropna() basmap = basmap[basmap.str.endswith(')')].str.strip(')') basmap = { val: df['set'][key] + 1 for key, val in basmap.to_dict().items() } # Discard the garbage drop = df['n'].str.strip().str.isnumeric().fillna(False) df.drop(drop[drop == False].index, inplace=True) df.drop('symbol', axis=1, inplace=True) # Clean up the series df['alpha'] = df['alpha'].astype(np.float64) df['n'] = df['n'].astype(np.int64) df['L'] = df['L'].str.lower().map(lmap) df['d'] = np.sqrt((2 * df['L'] + 1) / (4 * np.pi)) df['r'] = df['n'] - (df['L'] + 1) df['frame'] = 0 self.basis_set = BasisSet(df) self.meta['spherical'] = False self.atom['set'] = self.atom['symbol'].map(basmap)
def parse_basis_set(self): """ Parse the :class:`~exatomic.core.basis.BasisSet` dataframe. """ if not hasattr(self, "atom"): self.parse_atom() _rebas01 = ' Basis "' _rebas02 = ' Summary of "' _rebas03 = [ ' s ', ' px ', ' py ', ' pz ', ' d ', ' f ', ' g ', ' h ', ' i ', ' j ', ' k ', ' l ', ' m ', ' p ' ] found = self.find(_rebas01, _rebas02) spherical = True if "spherical" in found[_rebas01][0][1] else False start = found[_rebas01][0][0] + 2 idx = 1 if len(found[_rebas02]) > 1 else -1 stop = found[_rebas02][idx][0] - 1 # Read in all of the extra lines that contain ---- and tag names df = pd.read_fwf(StringIO("\n".join(self[start:stop])), widths=(4, 2, 16, 16), names=("shell", "L", "alpha", "d")) df.loc[df['shell'] == "--", "shell"] = np.nan tags = df.loc[(df['shell'].str.isdigit() == False), "shell"] idxs = tags.index.tolist() idxs.append(len(df)) df['set'] = "" for i, tag in enumerate(tags): df.loc[idxs[i]:idxs[i + 1], "set"] = tag df = df.dropna().reset_index(drop=True) mapper = {v: k for k, v in dict(enumerate(df['set'].unique())).items()} df['set'] = df['set'].map(mapper) df['L'] = df['L'].str.strip().str.lower().map(lmap) df['alpha'] = df['alpha'].astype(float) df['d'] = df['d'].astype(float) # NO SUPPORT FOR MULTIPLE FRAMES? df['frame'] = 0 self.basis_set = BasisSet(df) self.meta['spherical'] = spherical self.atom['set'] = self.atom['tag'].map(mapper)