def pyiron_potentials(pot_lst, potdb): config_lst, file_name_lst, model_lst, name_lst, species_lst, citations_lst = [], [], [], [], [], [] for pot in pot_lst: pot_model = get_model(pot=pot) if pot_model == "NISTiprpy": config_lst.append(get_lammps_config(pot=pot)) file_name_lst.append(get_file_names(pot=pot)) model_lst.append(pot_model) name_lst.append(get_name(pot=pot)) species_lst.append(get_species(pot=pot)) citations_lst.append(get_citations(pot=pot, potdb=potdb)) col, error = kimpy.collections.create() ptable = get_table('elements') element_lst = ptable.symbol.tolist() for pit in get_openkim_potential_lst(col=col): it, p = pit el_lst, pot_str = get_openkim_lammps_parameter(p=p, element_lst=element_lst) species_lst.append(el_lst) config_lst.append(pot_str) file_name_lst.append([]) model_lst.append('OPENKIM') name_lst.append(p) citations_lst.append(get_openkim_citation(p=p, it=it, col=col)) return pandas.DataFrame({ "Config": config_lst, "Filename": file_name_lst, "Model": model_lst, "Name": name_lst, "Species": species_lst, "Citations": citations_lst })
def read_atom_prop(): tb_atomic_props = get_table('elements') arr_atomic_nums = np.array(tb_atomic_props['atomic_number'], dtype=np.int) arr_atomic_props = np.nan_to_num(np.array(tb_atomic_props[sel_prop_names], dtype=np.float32)) arr_atomic_props = util.zscore(arr_atomic_props) atomic_props_mat = {arr_atomic_nums[i]: arr_atomic_props[i, :] for i in range(0, arr_atomic_nums.shape[0])} return atomic_props_mat
def __init__(self, formula=None): self.formula = formula self.xdb = XrayDB() cols = ['symbol', 'covalent_radius_cordero'] ptable = get_table('elements') x = ptable[cols] self.covalent_radius = x.set_index('symbol').T.to_dict( 'index')['covalent_radius_cordero'] if formula is not None: self.parse(self.formula)
def read_atom_prop(): tb_atm_props = get_table('elements') arr_atm_nums = numpy.array(tb_atm_props['atomic_number'], dtype=numpy.int) arr_atm_props = numpy.nan_to_num( numpy.array(tb_atm_props[atm_prop_names], dtype=numpy.float)) arr_atm_props = scale(arr_atm_props) atm_props_mat = { arr_atm_nums[i]: arr_atm_props[i, :] for i in range(0, arr_atm_nums.shape[0]) } return atm_props_mat
def clielement(): ''' CLI for convenient printing of properties for a given element ''' colorama.init(autoreset=True) parser = argparse.ArgumentParser() parser.add_argument( 'element', help='Element identifier: symbol, name or atomic number') args = parser.parse_args() try: args.element = int(args.element) except ValueError: pass e = element(args.element) f = Figlet('dotmatrix', justify='center') symbol = f.renderText(e.symbol) table = get_table('elements') et = table[table['symbol'] == e.symbol].transpose() et.drop('description', inplace=True) et.drop('sources', inplace=True) et.drop('uses', inplace=True) et.index = et.index.str.replace('_', ' ').str.capitalize() et.sort_index(inplace=True) # print the data print(colorama.Fore.RED + symbol) if e.description is not None: print(colorama.Fore.BLUE + 'Description\n===========\n') print('\n'.join([' ' + s for s in textwrap.wrap(e.description, 70)])) if e.sources is not None: print(colorama.Fore.BLUE + '\nSources\n=======\n') print('\n'.join([' ' + s for s in textwrap.wrap(e.sources, 70)])) if e.uses is not None: print(colorama.Fore.BLUE + '\nUses\n====\n') print('\n'.join([' ' + s for s in textwrap.wrap(e.uses, 70)])) print(colorama.Fore.GREEN + '\nProperties\n==========\n') print(et.to_string(justify='left', header=False))
def clielement(): """ CLI for convenient printing of properties for a given element """ colorama.init(autoreset=True) parser = argparse.ArgumentParser() parser.add_argument( "element", help="Element identifier: symbol, name or atomic number") args = parser.parse_args() try: args.element = int(args.element) except ValueError: pass e = element(args.element) f = Figlet("dotmatrix", justify="center") symbol = f.renderText(e.symbol) table = get_table("elements") et = table[table["symbol"] == e.symbol].transpose() et.drop("description", inplace=True) et.drop("sources", inplace=True) et.drop("uses", inplace=True) et.index = et.index.str.replace("_", " ").str.capitalize() et.sort_index(inplace=True) # print the data print(colorama.Fore.RED + symbol) if e.description is not None: print(colorama.Fore.BLUE + "Description\n===========\n") print("\n".join([" " + s for s in textwrap.wrap(e.description, 70)])) if e.sources is not None: print(colorama.Fore.BLUE + "\nSources\n=======\n") print("\n".join([" " + s for s in textwrap.wrap(e.sources, 70)])) if e.uses is not None: print(colorama.Fore.BLUE + "\nUses\n====\n") print("\n".join([" " + s for s in textwrap.wrap(e.uses, 70)])) print(colorama.Fore.GREEN + "\nProperties\n==========\n") print(et.to_string(justify="left", header=False))
def load_mat_atom_feats(): tb_atom_feats = get_table('elements') atom_feats = numpy.nan_to_num(numpy.array(tb_atom_feats[atom_feat_names]))[:96, :] ion_engs = numpy.zeros((atom_feats.shape[0], 1)) for i in range(0, ion_engs.shape[0]): ion_eng = element(i + 1).ionenergies if 1 in ion_eng: ion_engs[i, 0] = ion_eng[1] else: ion_engs[i, 0] = 0 global mat_atom_feats, num_atom_feats mat_atom_feats = preprocessing.scale(numpy.hstack((atom_feats, ion_engs))) num_atom_feats = mat_atom_feats.shape[1]
def _elements(*drop_list): from mendeleev import get_table table = get_table('elements') numeric = table.select_dtypes(exclude=[object]) list_ = drop_list if drop_list else [ 'group_id', 'series_id', 'fusion_heat', 'abundance_crust', 'abundance_sea', # 'is_monoisotopic', 'is_radioactive', ] numeric = numeric.drop(list_, 1) numeric = numeric.reindex_axis(sorted(numeric.columns), axis=1) numeric = numeric.rename(lambda i: table.loc[i, 'symbol']) return numeric
print('Writing df out...') if os.path.splitext(outfile)[1] == '.csv': df_ij.to_csv(outfile, sep='\t', encoding = 'utf-8') elif os.path.splitext(outfile)[1] == '.p': pickle.dump(df_ij, open(outfile, "wb" )) # elif os.path.splitext(outfile)[1] == '.feather': # df_ij.to_feather(outfile) else: raise ValueError('{} already exists and overwrite==False, ' 'either set overwrite==True or change outfile'.format(outfile)) #----------------------------------------------------------------------------------------------------------------------- # SELECT SITES THAT RECORD CHEMICAL CONCENTRATIONS #----------------------------------------------------------------------------------------------------------------------- #Create list of elements elems = list(itertools.chain.from_iterable(mendeleev.get_table('elements')[['name', 'symbol']].values)) elems_out = ['monoxide', 'dioxide', 'H', 'C', 'O', 'N', 'S', 'Hydrogen', 'Carbon', 'Oxygen', 'Nitrogen', 'Sulfur', '.*ene'] elems_regex = re.compile('|'.join(['\\b{}\\b'.format(e) for e in elems]), re.IGNORECASE) elemsout_regex = re.compile('|'.join(['\\b{}\\b'.format(e) for e in elems_out]), re.IGNORECASE) #Create list of parameter codes based on https://aqs.epa.gov/aqsweb/documents/codetables/methods_all.html #paramsel = ['11','12','14','22','65','82','85','86','89'] #param_regex = re.compile('|'.join(['(^{}.*)'.format(p) for p in paramsel])) #(monitors['Parameter Code'].astype(str).str.contains(param_regex)) & monitors_chem = monitors.loc[(monitors['Parameter Name'].str.contains(elems_regex)) & ~(monitors['Parameter Name'].str.contains(elemsout_regex)),:] #Pad monitors' codes with 0s to match sites' codes pd.unique(monitors_chem['County Code']) monitors_chem.loc[:, 'State Code'] = monitors_chem.loc[:, 'State Code'].astype(str).str.pad(2, 'left', '0')
def get_neutral_data(): """ Get extensive set of data from multiple database tables as pandas.DataFrame """ elements = get_table("elements") series = get_table("series") groups = get_table("groups") elements = pd.merge( elements, series, left_on="series_id", right_on="id", how="left", suffixes=("", "_series"), ) elements = pd.merge( elements, groups, left_on="group_id", right_on="group_id", how="left", suffixes=("", "_group"), ) elements.rename(columns={"color": "series_colors"}, inplace=True) en_scales = [ "allred-rochow", "cottrell-sutton", "gordy", "martynov-batsanov", "mulliken", "nagle", "sanderson", ] for scale in en_scales: elements["en_" + scale] = [ element(row.symbol).electronegativity(scale=scale) for i, row in elements.iterrows() ] for attr in ["hardness", "softness"]: elements[attr] = [ getattr(element(row.symbol), attr)() for i, row in elements.iterrows() ] elements["mass"] = [ element(row.symbol).mass_str() for i, row in elements.iterrows() ] elements.loc[:, "zeff_slater"] = elements.apply( lambda x: get_zeff(x["atomic_number"], method="slater"), axis=1) elements.loc[:, "zeff_clementi"] = elements.apply( lambda x: get_zeff(x["atomic_number"], method="clementi"), axis=1) session = get_session() engine = get_engine() query = (session.query(IonizationEnergy).filter( IonizationEnergy.degree == 1).filter( IonizationEnergy.atomic_number.in_(list(range(1, 119))))) out = pd.read_sql_query(query.statement.compile(dialect=sqlite.dialect()), engine) out = out[["atomic_number", "energy"]] out.columns = ["atomic_number", "ionization_energy"] elements = pd.merge(elements, out, on="atomic_number", how="left") return elements
def test_get_table(table_name, nrows): table = get_table(table_name) assert table.shape[0] == nrows
import pandas as pd import pickle as pk from mendeleev import get_table ptable = get_table('elements') import os set_of_molecules = set() set_of_molecules_with_mulliken = set() ptable_access = {} for _,row in ptable.iterrows(): ptable_access[row["symbol"]] = row["atomic_number"] data_path_prefix = "../data/" path_to_structures = data_path_prefix + "/structures/" molecule_structures = {} #with open(data_path_prefix + "molecule_structures.pkl",'rb') as f: # molecule_structures = pk.load(f) print("Processing Structures and atom properties") for i,molecule in enumerate(os.listdir(path_to_structures)): df = pd.read_csv(path_to_structures + molecule, sep = ' ', skiprows= [0],names = ["atom","x","y","z"]) for a_index in range(len(df)): atom = df["atom"].iloc[a_index] df.at[a_index, "electron_affinity"] = ptable.at[ptable_access[atom]-1,"electron_affinity"] df.at[a_index, "electronegativity"] = ptable.at[ptable_access[atom]-1,"en_pauling"] df.at[a_index, "num_protons"] = ptable_access[atom] df.at[a_index, "dipole_polarizability"] = ptable.at[ptable_access[atom]-1,"dipole_polarizability"]
from mendeleev import get_table import pandas as pd ptable = get_table('elements') cols = ['atomic_number', 'symbol', 'atomic_radius', 'en_pauling', 'block', 'vdw_radius_mm3'] ptable=ptable[cols].head() ptable=ptable[cols].describe() isotopes = get_table('isotopes', index_col='id') merged = pd.merge(ptable[cols], isotopes, how='outer', on='atomic_number')
ltm.tm_hour, ltm.tm_min, ltm.tm_sec) print(mssg_head + message) with open(path2files / 'parameters/standard_topology.json') as f: PDB_PROTEIN_TOPOLOGY = json.load(f) with open(path2files / 'parameters/standard_labels.json') as f: PDB_PROTEIN_TYPES = json.load(f) with open(path2files / 'parameters/standard_charges.json') as f: PDB_PROTEIN_CHARGES = json.load(f) with open(path2files / 'parameters/standard_charges_by_atom.json') as f: PDB_PROTEIN_TYPE_CHARGES = json.load(f) with open(path2files / 'parameters/babel2standard.json') as f: BABEL2STANDARD = json.load(f) PERIODIC_TABLE = get_table('elements') MAP_AN2SYMBOL = PERIODIC_TABLE[['atomic_number', 'symbol']].set_index('atomic_number') MAP_SYMBOL2AN = PERIODIC_TABLE[['atomic_number', 'symbol']].set_index('symbol') with open(path2files / 'parameters/wfn_symmetry_index.json') as f: WFN_SYMMETRY_INDEX = json.load(f) def get_atomic_number(symbol): an = MAP_SYMBOL2AN.loc[symbol]['atomic_number'] return an def get_symbol(atomic_number): atomic_number = int(atomic_number)
def gather_ptable_dataframe(): df_list = [] ## get properties in csv (retrieved from magpie project, imat project, and wikipedia) all_files = glob.glob(str(HERE/"*"/"*.csv")) for filename in all_files: prop = str(Path(filename).stem) source = str(Path(filename).parent.stem) name = source + "_" + prop tmp_df = pd.read_csv(filename, names=[name]) valid_0_list = [ "valence", "valence_s", "valence_p", "valence_d", "valence_f", "unfilled", "unfilled_f", "unfilled_d", "electron_affinity", "electronegativity", "magnetic_moment", ] if not prop in valid_0_list: tmp_df = tmp_df[name].apply(lambda x: None if x==0 else x) df_list.append(tmp_df) ## get ase magnetic moments magmom_list = ase_data.ground_state_magnetic_moments tmp_df = pd.DataFrame(magmom_list, columns=["ase_magnetic_moment"]) df_list.append(tmp_df) # concat in a single dataframe and drop the 0th entry (up to here, # properties were savec with element 0 as dummy so the index corresponed # to the atomic number) external_props = pd.concat(df_list, axis=1).drop(0) # concat with mendeleev's ptable (need reindexing with atomic number) ptable = get_table("elements") ptable = ptable.set_index('atomic_number', drop=False) ptable = pd.concat([ptable, external_props], axis=1) # add pymatgen properties ptable["pymg_atomic_radius"] = [Element(x).atomic_radius for x in ptable['symbol']] with warnings.catch_warnings(): warnings.simplefilter("ignore") ptable["pymg_electronegativity"] = [Element(x).X for x in ptable['symbol']] # add the first ionization energy from mendeleev tmp_df = get_table("ionizationenergies") ptable["ionization_energy"] = tmp_df.loc[tmp_df["degree"] == 1].set_index('atomic_number')['energy'] # drop useless columns ptable = ptable.drop([ 'annotation', 'description', 'discoverers', 'discovery_location', 'geochemical_class', 'goldschmidt_class', 'uses', 'sources', 'name_origin', ],1) # reindex by symbol ptable = ptable.set_index('symbol') return ptable
def get_neutral_data(): ''' Get extensive set of data from multiple database tables as pandas.DataFrame ''' elements = get_table('elements') series = get_table('series') groups = get_table('groups') elements = pd.merge(elements, series, left_on='series_id', right_on='id', how='left', suffixes=('', '_series')) elements = pd.merge(elements, groups, left_on='group_id', right_on='group_id', how='left', suffixes=('', '_group')) elements.rename(columns={'color': 'series_colors'}, inplace=True) en_scales = [ 'allred-rochow', 'cottrell-sutton', 'gordy', 'martynov-batsanov', 'mulliken', 'nagle', 'sanderson' ] for scale in en_scales: elements['en_' + scale] = [ element(row.symbol).electronegativity(scale=scale) for i, row in elements.iterrows() ] for attr in ['hardness', 'softness']: elements[attr] = [ getattr(element(row.symbol), attr)() for i, row in elements.iterrows() ] elements['mass'] = [ element(row.symbol).mass_str() for i, row in elements.iterrows() ] elements.loc[:, 'zeff_slater'] = elements.apply( lambda x: get_zeff(x['atomic_number'], method='slater'), axis=1) elements.loc[:, 'zeff_clementi'] = elements.apply( lambda x: get_zeff(x['atomic_number'], method='clementi'), axis=1) session = get_session() engine = get_engine() query = session.query(IonizationEnergy).\ filter(IonizationEnergy.degree == 1).\ filter(IonizationEnergy.atomic_number.in_(list(range(1, 119)))) out = pd.read_sql_query(query.statement.compile(dialect=sqlite.dialect()), engine) out = out[['atomic_number', 'energy']] out.columns = ['atomic_number', 'ionization_energy'] elements = pd.merge(elements, out, on='atomic_number', how='left') return elements
def get_ptable(): return get_table('elements')