def __init__(self): self.all_elemental_props = dict() available_props = [] self.data_dir = os.path.join(module_dir, "data_files", 'magpie_elementdata') # Make a list of available properties for datafile in glob(os.path.join(self.data_dir, "*.table")): available_props.append( os.path.basename(datafile).replace('.table', '')) # parse and store elemental properties for descriptor_name in available_props: with open(os.path.join(self.data_dir, '{}.table'.format(descriptor_name)), 'r') as f: self.all_elemental_props[descriptor_name] = dict() lines = f.readlines() for atomic_no in range(1, len(_pt_data) + 1): # max Z=103 try: if descriptor_name in ["OxidationStates"]: prop_value = [float(i) for i in lines[atomic_no - 1].split()] else: prop_value = float(lines[atomic_no - 1]) except ValueError: prop_value = float("NaN") self.all_elemental_props[descriptor_name][ Element.from_Z(atomic_no).symbol] = prop_value
def get_magpie_descriptor(comp, descriptor_name): """ Get descriptor data for elements in a compound from the Magpie data repository. Args: comp: (str) compound composition, eg: "NaCl" descriptor_name: name of Magpie descriptor needed. Find the entire list at https://bitbucket.org/wolverton/magpie/src/6ecf8d3b79e03e06ef55c141c350a08fbc8da849/Lookup%20Data/?at=master Returns: (list) of descriptor values for each atom in the composition """ magpiedata = [] magpiedata_tup_lst = [] magpiedata_tup = collections.namedtuple( 'magpiedata_tup', 'element propname propvalue propunit amt') available_props = [] # Make a list of available properties for datafile in os.listdir('data/magpie_elementdata'): available_props.append(datafile.replace('.table', '')) if descriptor_name not in available_props: raise ValueError( "This descriptor is not available from the Magpie repository. Choose from {}" .format(available_props)) # Get units from Magpie README file el_amt = Composition(comp).get_el_amt_dict() unit = None with open('data/magpie_elementdata/README.txt', 'r') as readme_file: readme_file_line = readme_file.readlines() for lineno, line in enumerate(readme_file_line, 1): if descriptor_name + '.table' in line: if 'Units: ' in readme_file_line[lineno + 1]: unit = readme_file_line[lineno + 1].split(':')[1].strip('\n') # Extract from data file with open('data/magpie_elementdata/' + descriptor_name + '.table', 'r') as descp_file: lines = descp_file.readlines() for el in el_amt: atomic_no = Element(el).Z magpiedata_tup_lst.append( magpiedata_tup(element=el, propname=descriptor_name, propvalue=float(lines[atomic_no - 1]), propunit=unit, amt=el_amt[el])) # Add descriptor values, one for each atom in the compound for i in range(int(el_amt[el])): magpiedata.append(float(lines[atomic_no - 1])) return magpiedata
def sulfide_type(structure): """ Determines if a structure is a sulfide/polysulfide Args: structure (Structure): Input structure. Returns: (str) sulfide/polysulfide/sulfate """ structure = structure.copy() structure.remove_oxidation_states() s = Element("S") comp = structure.composition if comp.is_element or s not in comp: return None finder = SpacegroupAnalyzer(structure, symprec=0.1) symm_structure = finder.get_symmetrized_structure() s_sites = [ sites[0] for sites in symm_structure.equivalent_sites if sites[0].specie == s ] def process_site(site): # in an exceptionally rare number of structures, the search # radius needs to be increased to find a neighbor atom search_radius = 4 neighbors = [] while len(neighbors) == 0: neighbors = structure.get_neighbors(site, search_radius) search_radius *= 2 if search_radius > max(structure.lattice.abc) * 2: break neighbors = sorted(neighbors, key=lambda n: n[1]) dist = neighbors[0].distance coord_elements = [ nn.site.specie for nn in neighbors if nn.distance < dist + 0.4 ][:4] avg_electroneg = np.mean([e.X for e in coord_elements]) if avg_electroneg > s.X: return "sulfate" elif avg_electroneg == s.X and s in coord_elements: return "polysulfide" else: return "sulfide" types = set([process_site(site) for site in s_sites]) if "sulfate" in types: return None elif "polysulfide" in types: return "polysulfide" else: return "sulfide"
def readLammps(desired_return): from pymatgen.io.lammps.outputs import parse_lammps_dumps, parse_lammps_log from pymatgen import Structure, Element from pymatgen.analysis.elasticity.stress import Stress from numpy import unique, array, argmin try: log = parse_lammps_log(filename="log.lammps")[-1] except IndexError: return_dict = {} for ret in desired_return: return_dict[ret] = None return return_dict result_dict = {} result_dict["energies"] = list(log['PotEng'])[-1] for dump in parse_lammps_dumps("dump.atoms"): atoms = dump.data coords = [''] * dump.natoms forces = [''] * dump.natoms masses = [''] * dump.natoms for atom in range(dump.natoms): coords[atoms["id"][atom] - 1] = [atoms["x"][atom], atoms["y"][atom], atoms["z"][atom]] forces[atoms['id'][atom] - 1] = [ atoms["fx"][atom], atoms["fy"][atom], atoms["fz"][atom] ] masses[atoms['id'][atom] - 1] = atoms["mass"][atom] box = dump.box unique_masses = unique(masses) ref_masses = [el.atomic_mass.real for el in Element] diff = abs(array(ref_masses) - unique_masses[:, None]) atomic_numbers = argmin(diff, axis=1) + 1 symbols = [Element.from_Z(an).symbol for an in atomic_numbers] species_map = {} for i in range(len(unique_masses)): species_map[unique_masses[i]] = symbols[i] atom_species = [species_map[mass] for mass in masses] result_dict["structures"] = Structure(box.to_lattice(), atom_species, coords, coords_are_cartesian=True) result_dict["forces"] = forces pressure = [ 1e-1 * list(log['c_press[{}]'.format(i)])[-1] for i in range(1, 7) ] result_dict["stresses"] = Stress([[pressure[0], pressure[3], pressure[4]], [pressure[3], pressure[1], pressure[5]], [pressure[4], pressure[5], pressure[2]]]) return_dict = {} for ret in desired_return: return_dict[ret] = result_dict[ret] return return_dict
def test_ozonide(self): el_li = Element("Li") el_o = Element("O") elts = [el_li, el_o, el_o, el_o] latt = Lattice.from_parameters(3.999911, 3.999911, 3.999911, 133.847504, 102.228244, 95.477342) coords = [[0.513004, 0.513004, 1.000000], [0.017616, 0.017616, 0.000000], [0.649993, 0.874790, 0.775203], [0.099587, 0.874790, 0.224797]] struct = Structure(latt, elts, coords) lio3_entry = ComputedStructureEntry(struct, -3, parameters={'is_hubbard': False, 'hubbards': None, 'run_type': 'GGA', 'potcar_symbols': ['PAW_PBE Fe 06Sep2000', 'PAW_PBE O 08Apr2002']}) lio3_entry_corrected = self.compat.process_entry(lio3_entry) self.assertAlmostEqual(lio3_entry_corrected.energy, -3.0 - 3 * 0.66975)
def test_disordered(self): si = Element("Si") n = Element("N") coords = list() coords.append(np.array([0, 0, 0])) coords.append(np.array([0.75, 0.5, 0.75])) lattice = Lattice(np.array([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]])) struct = Structure(lattice, [si, {si: 0.5, n: 0.5}], coords) writer = CifWriter(struct) ans = """# generated using pymatgen data_Si1.5N0.5 _symmetry_space_group_name_H-M 'P 1' _cell_length_a 3.84019793 _cell_length_b 3.84019899 _cell_length_c 3.84019793 _cell_angle_alpha 119.99999086 _cell_angle_beta 90.00000000 _cell_angle_gamma 60.00000914 _symmetry_Int_Tables_number 1 _chemical_formula_structural Si1.5N0.5 _chemical_formula_sum 'Si1.5 N0.5' _cell_volume 40.04479464 _cell_formula_units_Z 1 loop_ _symmetry_equiv_pos_site_id _symmetry_equiv_pos_as_xyz 1 'x, y, z' loop_ _atom_site_type_symbol _atom_site_label _atom_site_symmetry_multiplicity _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z _atom_site_occupancy Si Si0 1 0.00000000 0.00000000 0.00000000 1 Si Si1 1 0.75000000 0.50000000 0.75000000 0.5 N N2 1 0.75000000 0.50000000 0.75000000 0.5""" for l1, l2 in zip(str(writer).split("\n"), ans.split("\n")): self.assertEqual(l1.strip(), l2.strip())
def test_oxide_energy_corr(self): el_li = Element("Li") el_o = Element("O") elts = [el_li, el_li, el_o] latt = Lattice.from_parameters(3.278, 3.278, 3.278, 60, 60, 60) coords = [[0.25, 0.25, 0.25], [0.75, 0.75, 0.75], [0.0, 0.0, 0.0]] struct = Structure(latt, elts, coords) li2o_entry = ComputedStructureEntry( struct, -3, parameters={ 'is_hubbard': False, 'hubbards': None, 'run_type': 'GGA', 'potcar_symbols': ['PAW_PBE Fe 06Sep2000', 'PAW_PBE O 08Apr2002'] }) li2o_entry_corrected = self.compat.process_entry(li2o_entry) self.assertAlmostEqual(li2o_entry_corrected.energy, -3.0 - 0.66975, 4)
def write_param(self): """ Write parameter and coefficient file to perform lammps calculation. """ if not self.specie: raise ValueError("No specie given!") param_file = '{}.snapparam'.format(self.name) coeff_file = '{}.snapcoeff'.format(self.name) model = self.model # ncoeff = len(model.coef) describer = self.model.describer profile = describer.element_profile elements = [ element.symbol for element in sorted([Element(e) for e in profile.keys()]) ] ne = len(elements) nbc = len(describer.subscripts) if describer.quadratic: nbc += int((1 + nbc) * nbc / 2) tjm = describer.twojmax diag = describer.diagonalstyle # assert ncoeff == ne * (nbc + 1),\ # '{} coefficients given. '.format(ncoeff) + \ # '{} ({} * ({} + 1)) '.format(ne * (nbc + 1), ne, nbc) + \ # 'coefficients expected ' + \ # 'for twojmax={} and diagonalstyle={}.'.format(tjm, diag) coeff_lines = [] coeff_lines.append('{} {}'.format(ne, nbc + 1)) for element, coeff in zip(elements, np.split(model.coef, ne)): coeff_lines.append('{} {} {}'.format(element, profile[element]['r'], profile[element]['w'])) coeff_lines.extend([str(c) for c in coeff]) with open(coeff_file, 'w') as f: f.write('\n'.join(coeff_lines)) param_lines = [] keys = ['rcutfac', 'twojmax', 'rfac0', 'rmin0', 'diagonalstyle'] param_lines.extend( ['{} {}'.format(k, getattr(describer, k)) for k in keys]) param_lines.append('quadraticflag {}'.format(int(describer.quadratic))) param_lines.append('bzeroflag 0') with open(param_file, 'w') as f: f.write('\n'.join(param_lines)) pair_coeff = self.pair_coeff.format(elements=' '.join(elements), specie=self.specie.name, coeff_file=coeff_file, param_file=param_file) ff_settings = [self.pair_style, pair_coeff] return ff_settings
def cell_to_structure( cell: Tuple[List[List[float]], List[List[float]], List[int]]) -> Structure: """ cell: (Lattice parameters [[a_x, a_y, a_z], [b_x, b_y, b_z], [c_x, c_y, c_z]], Fractional atomic coordinates in an Nx3 array, Z numbers of species in a length N array) """ species = [Element.from_Z(i) for i in cell[2]] return Structure(lattice=cell[0], coords=cell[1], species=species)
def num_atom_differences(structure: IStructure, ref_structure: IStructure) -> Dict[Element, int]: target_composition = structure.composition.as_dict() reference_composition = ref_structure.composition.as_dict() result = {} for k in set(target_composition.keys()) | set( reference_composition.keys()): n_atom_diff = int(target_composition[k] - reference_composition[k]) if n_atom_diff: result[Element(k)] = n_atom_diff return result
def setUp(self): element_profile = {'Ni': {'r': 0.5, 'w': 1}} describer = BispectrumCoefficients(rcutfac=4.1, twojmax=8, element_profile=element_profile, pot_fit=True) model = LinearModel(describer=describer) model.model.coef_ = coeff model.model.intercept_ = intercept snap = SNAPotential(model=model) snap.specie = Element('Ni') self.ff_settings = snap
def from_config(param_file, coeff_file, **kwargs): """ Initialize potentials with parameters file and coefficient file. Args: param_file (str): The file storing the configuration of potentials. coeff_file (str): The file storing the coefficients of potentials. Return: SNAPotential. """ with open(coeff_file) as f: coeff_lines = f.readlines() coeff_lines = [ line for line in coeff_lines if not line.startswith('#') ] specie, r, w = coeff_lines[1].split() r, w = float(r), int(w) element_profile = {specie: {'r': r, 'w': w}} rcut_pattern = re.compile('rcutfac (.*?)\n', re.S) twojmax_pattern = re.compile('twojmax (\d*)\n', re.S) rfac_pattern = re.compile('rfac0 (.*?)\n', re.S) rmin_pattern = re.compile('rmin0 (.*?)\n', re.S) diagonalstyle_pattern = re.compile('diagonalstyle (.*?)\n', re.S) quadratic_pattern = re.compile('quadraticflag (.*?)(?=\n|$)', re.S) with zopen(param_file, 'rt') as f: param_lines = f.read() rcut = float(rcut_pattern.findall(param_lines)[-1]) twojmax = int(twojmax_pattern.findall(param_lines)[-1]) rfac = float(rfac_pattern.findall(param_lines)[-1]) rmin = int(rmin_pattern.findall(param_lines)[-1]) diagonal = int(diagonalstyle_pattern.findall(param_lines)[-1]) if quadratic_pattern.findall(param_lines): quadratic = bool(int(quadratic_pattern.findall(param_lines)[-1])) else: quadratic = False describer = BispectrumCoefficients(rcutfac=rcut, twojmax=twojmax, rfac0=rfac, element_profile=element_profile, rmin0=rmin, diagonalstyle=diagonal, quadratic=quadratic, pot_fit=True) model = LinearModel(describer=describer, **kwargs) model.model.coef_ = np.array(coeff_lines[2:], dtype=np.float) model.model.intercept_ = 0 snap = SNAPotential(model=model) snap.specie = Element(specie) return snap
def from_options( cls, xc: Xc, symbol_list: list, factor: int, aexx: Optional[float] = 0.25, hubbard_u: Optional[bool] = None, ldauu: Optional[dict] = None, ldaul: Optional[dict] = None, ldaul_set_name: Optional[str] = "default") -> "XcIncarSettings": """ Construct incar settings related to xc with some options. Args: See ViseInputSet docstrings Return: XcIncarSettings class object """ settings = \ load_default_incar_settings(yaml_filename="xc_incar_set.yaml", required_flags=XC_REQUIRED_FLAGS, optional_flags=XC_OPTIONAL_FLAGS, key_name=str(xc)) # By default Hubbard U is set for LDA or GGA. hubbard_u = xc in LDA_OR_GGA if hubbard_u is None else hubbard_u ldauu = ldauu or {} ldaul = ldaul or {} if xc == Xc.pbesol: settings["GGA"] = "PS" elif xc == Xc.scan: settings["METAGGA"] = "SCAN" if hubbard_u: u_set = loadfn(SET_DIR / "u_parameter_set.yaml") ldauu_set = u_set["LDAUU"][ldaul_set_name] ldauu_set.update(ldauu) ldauu = [ldauu_set.get(el, 0) for el in symbol_list] if sum(ldauu) > 0: settings["LDAUU"] = ldauu settings.update({"LDAU": True, "LDAUTYPE": 2, "LDAUPRINT": 1}) ldaul_set = u_set["LDAUL"][ldaul_set_name] ldaul_set.update(ldaul) settings["LDAUL"] = \ [ldaul_set.get(el, -1) for el in symbol_list] settings["LMAXMIX"] = \ 6 if any([Element(el).Z > 56 for el in symbol_list]) else 4 if xc in HYBRID_FUNCTIONAL: settings["AEXX"] = aexx if factor > 1: settings["NKRED"] = factor return cls(settings=settings)
def test_get_property(self): self.assertAlmostEqual( -4.3853, self.data_source.get_elemental_property(Element("Bi"), "mus_fere"), 4) self.assertEqual( 59600, self.data_source.get_elemental_property(Element("Li"), "electron_affin")) self.assertAlmostEqual( 2372300, self.data_source.get_elemental_property(Element("He"), "first_ioniz")) self.assertAlmostEqual( sum([2372300, 5250500]), self.data_source.get_charge_dependent_property_from_specie( Specie("He", 2), "total_ioniz")) self.assertAlmostEqual( 18.6, self.data_source.get_charge_dependent_property_from_specie( Specie("V", 3), "xtal_field_split"))
def contains_element(entries, element_symbol): """Returns the entries in a list of entries which contain element_symbol Parameters: ----------- entries: list """ element = Element(element_symbol) lst = [entry for entry in entries if element in entry.composition.elements] return lst
def test_oxide_energy_corr(self): el_li = Element("Li") el_o = Element("O") elts = [el_li, el_li, el_o] latt = Lattice.from_parameters(3.278, 3.278, 3.278, 60, 60, 60) coords = [[0.25, 0.25, 0.25], [0.75, 0.75, 0.75], [0.0, 0.0, 0.0]] struct = Structure(latt, elts, coords) li2o_entry = ComputedStructureEntry(struct, -3, parameters={'is_hubbard': False, 'hubbards': None, 'run_type': 'GGA', 'potcar_spec': [{'titel':'PAW_PBE Li 17Jan2003', 'hash': '65e83282d1707ec078c1012afbd05be8'}, {'titel': 'PAW_PBE O 08Apr2002', 'hash': '7a25bc5b9a5393f46600a4939d357982'}]}) li2o_entry_corrected = self.compat.process_entry(li2o_entry) self.assertAlmostEqual(li2o_entry_corrected.energy, -3.0 -0.66975, 4)
def setUpClass(cls): mass_info = [("A", "H"), ("B", Element("C")), ("C", Element("O")), ("D", 1.00794)] nonbond_coeffs = [[1, 1, 1.1225], [1, 1.175, 1.31894], [1, 1.55, 1.73988], [1, 1, 1.1225], [1, 1.35, 4], [1, 1.725, 1.93631], [1, 1.175, 1.31894], [1, 2.1, 4], [1, 1.55, 1.73988], [1, 1, 1.1225]] topo_coeffs = { "Bond Coeffs": [{ "coeffs": [50, 0.659469], "types": [("A", "B"), ("C", "D")] }, { "coeffs": [50, 0.855906], "types": [("B", "C")] }] } cls.virus = ForceField(mass_info=mass_info, nonbond_coeffs=nonbond_coeffs, topo_coeffs=topo_coeffs) cls.ethane = ForceField.from_file( os.path.join(test_dir, "ff_ethane.yaml"))
def DOI(self, calc_state): asites = self.site_matcher.get_mapping(calc_state.config.structure, self.Asite_struct) # print asites # print spinel_config.structure # print spinel_config.Asite_struct x = 0 for i in asites: if calc_state.config.structure.species[i] == Element(self.Bspecie): x += 1 x /= float(len(asites)) return x
def test_apply_transformation(self): l = Lattice.cubic(4) s_orig = Structure(l, [{"Li": 0.19, "Na": 0.19, "K": 0.62}, {"O": 1}], [[0, 0, 0], [0.5, 0.5, 0.5]]) dot = DiscretizeOccupanciesTransformation(max_denominator=5, tol=0.5) s = dot.apply_transformation(s_orig) self.assertEqual(dict(s[0].species), {Element("Li"): 0.2, Element("Na"): 0.2, Element("K"): 0.6}) dot = DiscretizeOccupanciesTransformation(max_denominator=5, tol=0.01) self.assertRaises(RuntimeError, dot.apply_transformation, s_orig) s_orig_2 = Structure(l, [{"Li": 0.5, "Na": 0.25, "K": 0.25}, {"O": 1}], [[0, 0, 0], [0.5, 0.5, 0.5]]) dot = DiscretizeOccupanciesTransformation(max_denominator=9, tol=0.25, fix_denominator=False) s = dot.apply_transformation(s_orig_2) self.assertEqual(dict(s[0].species), {Element("Li"): Fraction(1/2), Element("Na"): Fraction(1/4), Element("K"): Fraction(1/4)}) dot = DiscretizeOccupanciesTransformation(max_denominator=9, tol=0.05, fix_denominator=True) self.assertRaises(RuntimeError, dot.apply_transformation, s_orig_2)
def test_get_data(self): self.assertEqual( -27, self.data.get_mixing_enthalpy(Element('H'), Element('Pd'))) self.assertEqual( -27, self.data.get_mixing_enthalpy(Element('Pd'), Element('H'))) self.assertTrue( isnan(self.data.get_mixing_enthalpy(Element('He'), Element('H'))))
def VoronoiInfo(poscar, vasprun, elements): base_s = poscar.structure start_i = get_center_i( base_s, Element('O'), ) print(start_i) vnn = VoronoiNN(targets=[Element(x) for x in elements]) start_vnn = vnn.get_nn_info(base_s, start_i) weight = {} total_length = {} for pt in start_vnn: # print('{}: {:3.2f}'.format(pt['site_index'], pt['weight'])) temp_weight = round(pt['weight'] + 0.45) if pt['site'].species_string in weight: weight[pt['site'].species_string] = weight[ pt['site'].species_string] + temp_weight else: weight[pt['site'].species_string] = temp_weight if temp_weight >= 0.99: bond_length = base_s.get_distance(start_i, pt['site_index']) if pt['site'].species_string in total_length: total_length[pt['site'].species_string] = total_length[ pt['site'].species_string] + bond_length else: total_length[pt['site'].species_string] = bond_length bonds = { 'A_Bonds': weight[elements[0]], 'A_length': total_length[elements[0]] / weight[elements[0]], 'B_Bonds': weight[elements[1]], 'B_length': total_length[elements[1]] / weight[elements[1]], 'avg_length': (total_length[elements[0]] + total_length[elements[1]]) / (weight[elements[0]] + weight[elements[1]]), } return bonds
def get_vc_plot_data(self, open_el, valence=None, entries=None, allowpmu=True): common_working_ion = {Element('Li'): 1, Element('Na'): 1, Element('K'): 1, Element('Mg'): 2, Element('Ca'): 2, Element('Zn'): 2, Element('Al'): 3} if valence: ioncharge = valence else: if open_el not in common_working_ion.keys(): raise ValueError('Working ion {} not supported. You can provide charge manually'.format(open_el.symbol)) else: ioncharge = common_working_ion[open_el] evolution_profile = self.get_phase_evolution_profile(open_el, entries=entries, allowpmu=allowpmu) oe_list = [] v_list = [] for i in range(len(evolution_profile)): step = evolution_profile[-i - 1] oe_content = step['evolution'] miu_vasp = step['chempot'] oe_list.append(oe_content) v_list.append(miu_vasp) v_ref = v_list[-1] v_list = [-i + v_ref for i in v_list] v_list = [v / ioncharge for v in v_list] return oe_list, v_list
def read_poscar(layers, atom_dict): from pymatgen import Structure, Element from objects import atom name = (input('Enter Name of POSCAR file in the same directory: ')) poscar = Structure.from_file(name) for i in poscar: mg_atom = Element(str(i.specie)) atom_dict[len(atom_dict)] = atom(mg_atom, list(i.coords), num=len(atom_dict), c_tag='b') layers.append([atom_dict[i] for i in atom_dict]) print('Read POSCAR as a single layer') cmd = input('add layer <l> or add adsorbate <a>? ') while cmd == 'l': sym = input("Which atom to deposit? ") add_layer(Element(sym)) cmd = input('add layer <l> or add adsorbate <a>? ') if cmd == 'a': add_ads(layers[-1], Element(sym))
def test_connectivity_array(self): vc = VoronoiConnectivity(self.get_structure("LiFePO4")) ca = vc.connectivity_array expected = np.array([0, 1.96338392, 0, 0.04594495]) self.assertTrue(np.allclose(ca[15, :4, ca.shape[2] // 2], expected)) expected = np.array([0, 0, 0]) self.assertTrue(np.allclose(ca[1, -3:, 51], expected)) site = vc.get_sitej(27, 51) self.assertEqual(site.specie, Element('O')) expected = np.array([-0.29158, 0.74889, 0.95684]) self.assertTrue(np.allclose(site.frac_coords, expected))
def test_process_entry_peroxide(self): latt = Lattice.from_parameters(3.159597, 3.159572, 7.685205, 89.999884, 89.999674, 60.000510) el_li = Element("Li") el_o = Element("O") elts = [el_li, el_li, el_li, el_li, el_o, el_o, el_o, el_o] coords = [[0.666656, 0.666705, 0.750001], [0.333342, 0.333378, 0.250001], [0.000001, 0.000041, 0.500001], [0.000001, 0.000021, 0.000001], [0.333347, 0.333332, 0.649191], [0.333322, 0.333353, 0.850803], [0.666666, 0.666686, 0.350813], [0.666665, 0.666684, 0.149189]] struct = Structure(latt, elts, coords) li2o2_entry = ComputedStructureEntry(struct, -3, parameters={'is_hubbard': False, 'hubbards': None, 'run_type': 'GGA', 'potcar_symbols': ['PAW_PBE Fe 06Sep2000', 'PAW_PBE O 08Apr2002']}) li2o2_entry_corrected = self.compat.process_entry(li2o2_entry) self.assertAlmostEqual(li2o2_entry_corrected.energy, -3 - 0.44317 * 4, 4)
def test_ozonide(self): el_li = Element("Li") el_o = Element("O") elts = [el_li, el_o, el_o, el_o] latt = Lattice.from_parameters(3.999911, 3.999911, 3.999911, 133.847504, 102.228244, 95.477342) coords = [[0.513004, 0.513004, 1.000000], [0.017616, 0.017616, 0.000000], [0.649993, 0.874790, 0.775203], [0.099587, 0.874790, 0.224797]] struct = Structure(latt, elts, coords) lio3_entry = ComputedStructureEntry(struct, -3, parameters={'is_hubbard': False, 'hubbards': None, 'run_type': 'GGA', 'potcar_spec': [{'titel':'PAW_PBE Li 17Jan2003', 'hash': '65e83282d1707ec078c1012afbd05be8'}, {'titel': 'PAW_PBE O 08Apr2002', 'hash': '7a25bc5b9a5393f46600a4939d357982'}]}) lio3_entry_corrected = self.compat.process_entry(lio3_entry) self.assertAlmostEqual(lio3_entry_corrected.energy, -3.0 - 3 * 0.66975)
def find_seekpath_data(self) -> None: """Get full information of seekpath band path. """ self._seekpath_data = \ seekpath.get_explicit_k_path(structure=self.cell, symprec=self.symprec, angle_tolerance=self.angle_tolerance, with_time_reversal=self.time_reversal, reference_distance=self.ref_distance) lattice = self._seekpath_data["primitive_lattice"] element_types = self._seekpath_data["primitive_types"] species = [Element.from_Z(i) for i in element_types] positions = self._seekpath_data["primitive_positions"] self._band_primitive = Structure(lattice, species, positions)
def get_composition_from_string(comp_str): """validate and return composition from string `comp_str`.""" from pymatgen import Composition, Element comp = Composition(comp_str) for element in comp.elements: Element(element) formula = comp.get_integer_formula_and_factor()[0] comp = Composition(formula) return ''.join([ '{}{}'.format(key, int(value) if value > 1 else '') for key, value in comp.as_dict().items() ])
def insert_g3testset(coll): for f in glob.glob("g*.txt"): print("Parsing " + f) for (m, charge, spin) in parse_file(f): try: clean_sites = [] for site in m: if Element.is_valid_symbol(site.specie.symbol): clean_sites.append(site) clean_mol = Molecule.from_sites(clean_sites, charge=charge, spin_multiplicity=spin) xyz = XYZ(clean_mol) bb = BabelMolAdaptor.from_string(str(xyz), "xyz") pbmol = pb.Molecule(bb.openbabel_mol) smiles = pbmol.write("smi").split()[0] can = pbmol.write("can").split()[0] inchi = pbmol.write("inchi") svg = pbmol.write("svg") d = {"molecule": clean_mol.as_dict()} comp = clean_mol.composition d["pretty_formula"] = comp.reduced_formula d["formula"] = comp.formula d["composition"] = comp.as_dict() d["elements"] = list(comp.as_dict().keys()) d["nelements"] = len(comp) d["charge"] = charge d["spin_multiplicity"] = spin d["smiles"] = smiles d["can"] = can d["inchi"] = inchi # d["names"] = get_nih_names(smiles) d["svg"] = svg d["xyz"] = str(xyz) d["tags"] = ["G305 test set"] coll.update( { "inchi": inchi, "charge": charge, "spin_multiplicity": spin }, {"$set": d}, upsert=True) except Exception as ex: print("Error in {}".format(f)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout) print("{} parsed!".format(f))
def get_pymatgen_descriptor(comp, prop): """ Get descriptor data for elements in a compound from pymatgen. Args: comp: (str) compound composition, eg: "NaCl" prop: (str) pymatgen element attribute, as defined in the Element class at http://pymatgen.org/_modules/pymatgen/core/periodic_table.html Returns: (list) of namedtuples containing element name, property name, property value, units, and amount of element """ eldata_lst = [] eldata = collections.namedtuple('eldata', 'element propname propvalue propunit amt') el_amt_dict = Composition(comp).get_el_amt_dict() for el in el_amt_dict: if callable(getattr(Element(el), prop)) is None: raise ValueError('Invalid pymatgen Element attribute(property)') if getattr(Element(el), prop) is not None: if prop in ['X', 'Z', 'ionic_radii', 'group', 'row', 'number']: units = None else: units = getattr(Element(el), prop).unit eldata_lst.append( eldata(element=el, propname=prop, propvalue=float(getattr(Element(el), prop)), propunit=units, amt=el_amt_dict[el])) else: eldata_lst.append( eldata(element=el, propname=prop, propvalue=None, propunit=None, amt=el_amt_dict[el])) return eldata_lst
def test_get_nn(self): s = self.get_structure('LiFePO4') # Test the default near-neighbor finder. nsites_checked = 0 for site_idx, site in enumerate(s): if site.specie == Element("Li"): self.assertEqual(self.jmol.get_cn(s, site_idx), 0) nsites_checked += 1 elif site.specie == Element("Fe"): self.assertEqual(self.jmol.get_cn(s, site_idx), 6) nsites_checked += 1 elif site.specie == Element("P"): self.assertEqual(self.jmol.get_cn(s, site_idx), 4) nsites_checked += 1 self.assertEqual(nsites_checked, 12) # Test a user override that would cause Li to show up as 6-coordinated self.assertEqual(self.jmol_update.get_cn(s, 0), 6) # Verify get_nn function works self.assertEqual(len(self.jmol_update.get_nn(s, 0)), 6)
def __init__(self): dfile = os.path.join(module_dir, "data_files/megnet_elemental_embedding.json") self._dummy = "Dummy" with open(dfile, "r") as fp: embeddings = json.load(fp) self.prop_names = ["embedding {}".format(i) for i in range(1, 17)] self.all_element_data = {} for i in range(95): embedding_dict = dict(zip(self.prop_names, embeddings[i])) if i == 0: self.all_element_data[self._dummy] = embedding_dict else: self.all_element_data[str(Element.from_Z(i))] = embedding_dict
def gen_iupac_ordering(): periodic_table = loadfn("periodic_table.json") order = [([18], range(6, 0, -1)), # noble gasses ([1], range(7, 1, -1)), # alkali metals ([2], range(7, 1, -1)), # alkali earth metals (range(17, 2, -1), [9]), # actinides (range(17, 2, -1), [8]), # lanthanides ([3], (5, 4)), # Y, Sc ([4], (6, 5, 4)), # Hf -> Ti ([5], (6, 5, 4)), # Ta -> V ([6], (6, 5, 4)), # W -> Cr ([7], (6, 5, 4)), # Re -> Mn ([8], (6, 5, 4)), # Os -> Fe ([9], (6, 5, 4)), # Ir -> Co ([10], (6, 5, 4)), # Pt -> Ni ([11], (6, 5, 4)), # Au -> Cu ([12], (6, 5, 4)), # Hg -> Zn ([13], range(6, 1, -1)), # Tl -> B ([14], range(6, 1, -1)), # Pb -> C ([15], range(6, 1, -1)), # Bi -> N ([1], [1]), # Hydrogen ([16], range(6, 1, -1)), # Po -> O ([17], range(6, 1, -1))] # At -> F order = sum([list(product(x, y)) for x, y in order], []) iupac_ordering_dict = dict(zip( [Element.from_row_and_group(row, group) for group, row in order], range(len(order)))) # first clean periodic table of any IUPAC ordering for el in periodic_table: periodic_table[el].pop('IUPAC ordering', None) # now add iupac ordering for el in periodic_table: if 'IUPAC ordering' in periodic_table[el]: # sanity check that we don't cover the same element twice raise KeyError("IUPAC ordering already exists for {}".format(el)) periodic_table[el]['IUPAC ordering'] = iupac_ordering_dict[get_el_sp(el)]
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") num_elec_patt = re.compile("(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile( "^\s*(Mulliken charges|Mulliken atomic charges)") mulliken_charge_patt = re.compile( '^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") forces_on_patt = re.compile( "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)") forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*") forces_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)") freq_on_patt = re.compile( "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*") freq_patt = re.compile("Frequencies\s--\s+(.*)") normal_mode_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*") mo_coeff_patt = re.compile("Molecular Orbital Coefficients:") mo_coeff_name_patt = re.compile("\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} self.link0 = {} self.cart_forces = [] self.frequencies = [] self.eigenvalues = [] self.is_spin = False coord_txt = [] read_coord = 0 read_mulliken = False read_eigen = False eigen_txt = [] parse_stage = 0 num_basis_found = False terminated = False parse_forces = False forces = [] parse_freq = False frequencies = [] read_mo = False with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif link0_patt.match(line): m = link0_patt.match(line) self.link0[m.group(1)] = m.group(2) elif route_patt.search(line): params = read_route_line(line) self.functional = params[0] self.basis_set = params[1] self.route = params[2] self.dieze_tag = params[3] parse_stage = 1 elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append([float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) if parse_forces: m = forces_patt.search(line) if m: forces.extend([float(_v) for _v in m.groups()[2:5]]) elif forces_off_patt.search(line): self.cart_forces.append(forces) forces = [] parse_forces = False # read molecular orbital eigenvalues if read_eigen: m = orbital_patt.search(line) if m: eigen_txt.append(line) else: read_eigen = False self.eigenvalues = {Spin.up: []} for eigenline in eigen_txt: if "Alpha" in eigenline: self.eigenvalues[Spin.up] += [float(e) for e in float_patt.findall(eigenline)] elif "Beta" in eigenline: if Spin.down not in self.eigenvalues: self.eigenvalues[Spin.down] = [] self.eigenvalues[Spin.down] += [float(e) for e in float_patt.findall(eigenline)] eigen_txt = [] # read molecular orbital coefficients if read_mo: # build a matrix with all coefficients all_spin = [Spin.up] if self.is_spin: all_spin.append(Spin.down) mat_mo = {} for spin in all_spin: mat_mo[spin] = np.zeros((self.num_basis_func, self.num_basis_func)) nMO = 0 end_mo = False while nMO < self.num_basis_func and not end_mo: f.readline() f.readline() self.atom_basis_labels = [] for i in range(self.num_basis_func): line = f.readline() # identify atom and OA labels m = mo_coeff_name_patt.search(line) if m.group(1).strip() != "": iat = int(m.group(2)) - 1 # atname = m.group(3) self.atom_basis_labels.append([m.group(4)]) else: self.atom_basis_labels[iat].append(m.group(4)) # MO coefficients coeffs = [float(c) for c in float_patt.findall(line)] for j in range(len(coeffs)): mat_mo[spin][i, nMO + j] = coeffs[j] nMO += len(coeffs) line = f.readline() # manage pop=regular case (not all MO) if nMO < self.num_basis_func and \ ("Density Matrix:" in line or mo_coeff_patt.search(line)): end_mo = True warnings.warn("POP=regular case, matrix coefficients not complete") f.readline() self.eigenvectors = mat_mo read_mo = False # build a more convenient array dict with MO coefficient of # each atom in each MO. # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... } mo = {} for spin in all_spin: mo[spin] = [[{} for iat in range(len(self.atom_basis_labels))] for j in range(self.num_basis_func)] for j in range(self.num_basis_func): i = 0 for iat in range(len(self.atom_basis_labels)): for label in self.atom_basis_labels[iat]: mo[spin][j][iat][label] = self.eigenvectors[spin][i, j] i += 1 self.molecular_orbital = mo elif parse_freq: m = freq_patt.search(line) if m: values = [float(_v) for _v in m.groups()[0].split()] for value in values: frequencies.append([value, []]) elif normal_mode_patt.search(line): values = [float(_v) for _v in line.split()[2:]] n = int(len(values) / 3) for i in range(0, len(values), 3): j = -n + int(i / 3) frequencies[j][1].extend(values[i:i+3]) elif line.find("-------------------") != -1: parse_freq = False self.frequencies.append(frequencies) frequencies = [] elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization " "error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif num_elec_patt.search(line): m = num_elec_patt.search(line) self.electrons = (int(m.group(1)), int(m.group(2))) elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append(float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif not read_eigen and orbital_patt.search(line): eigen_txt.append(line) read_eigen = True elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = True elif not parse_forces and forces_on_patt.search(line): parse_forces = True elif freq_on_patt.search(line): parse_freq = True elif mo_coeff_patt.search(line): if "Alpha" in line: self.is_spin = True read_mo = True if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = {int(m.group(1)): [m.group(2), float(m.group(3))]} mulliken_charges.update(dict) read_mulliken = False self.Mulliken_charges = mulliken_charges if not terminated: #raise IOError("Bad Gaussian output file.") warnings.warn("\n" + self.filename + \ ": Termination error or bad Gaussian output file !")
print("The MAE of the linear ridge regression band gap model using the naive feature set is: "\ + str(round(abs(mean(scores)), 3)) + " eV") ############################################################################################################## # Let's see which features are most important for the linear model print("Below are the fitted linear ridge regression coefficients for each feature (i.e., element) in our naive feature set") linear.fit(naiveFeatures, bandgaps) # fit to the whole data set; we're not doing CV here print("element: coefficient") for i in range(MAX_Z): element = Element.from_Z(i + 1) print(element.symbol + ': ' + str(linear.coef_[i])) ############################################################################################################## # Create alternative feature set that is more physically-motivated physicalFeatures = [] for material in materials: theseFeatures = [] fraction = [] atomicNo = [] eneg = [] group = []
def disassemble(self, atom_labels=None, guess_element=True, ff_label="ff_map"): """ Breaks down LammpsData to building blocks (LammpsBox, ForceField and a series of Topology). RESTRICTIONS APPLIED: 1. No complex force field defined not just on atom types, where the same type or equivalent types of topology may have more than one set of coefficients. 2. No intermolecular topologies (with atoms from different molecule-ID) since a Topology object includes data for ONE molecule or structure only. Args: atom_labels ([str]): List of strings (must be different from one another) for labelling each atom type found in Masses section. Default to None, where the labels are automaticaly added based on either element guess or dummy specie assignment. guess_element (bool): Whether to guess the element based on its atomic mass. Default to True, otherwise dummy species "Qa", "Qb", ... will be assigned to various atom types. The guessed or assigned elements will be reflected on atom labels if atom_labels is None, as well as on the species of molecule in each Topology. ff_label (str): Site property key for labeling atoms of different types. Default to "ff_map". Returns: LammpsBox, ForceField, [Topology] """ atoms_df = self.atoms.copy() if "nx" in atoms_df.columns: atoms_df[["x", "y", "z"]] += \ self.box.get_box_shift(atoms_df[["nx", "ny", "nz"]].values) atoms_df = pd.concat([atoms_df, self.velocities], axis=1) mids = atoms_df.get("molecule-ID") if mids is None: unique_mids = [1] data_by_mols = {1: {"Atoms": atoms_df}} else: unique_mids = np.unique(mids) data_by_mols = {} for k in unique_mids: df = atoms_df[atoms_df["molecule-ID"] == k] data_by_mols[k] = {"Atoms": df} masses = self.masses.copy() masses["label"] = atom_labels unique_masses = np.unique(masses["mass"]) if guess_element: ref_masses = [el.atomic_mass.real for el in Element] diff = np.abs(np.array(ref_masses) - unique_masses[:, None]) atomic_numbers = np.argmin(diff, axis=1) + 1 symbols = [Element.from_Z(an).symbol for an in atomic_numbers] else: symbols = ["Q%s" % a for a in map(chr, range(97, 97 + len(unique_masses)))] for um, s in zip(unique_masses, symbols): masses.loc[masses["mass"] == um, "element"] = s if atom_labels is None: # add unique labels based on elements for el, vc in masses["element"].value_counts().iteritems(): masses.loc[masses["element"] == el, "label"] = \ ["%s%d" % (el, c) for c in range(1, vc + 1)] assert masses["label"].nunique(dropna=False) == len(masses), \ "Expecting unique atom label for each type" mass_info = [tuple([r["label"], r["mass"]]) for _, r in masses.iterrows()] nonbond_coeffs, topo_coeffs = None, None if self.force_field: if "PairIJ Coeffs" in self.force_field: nbc = self.force_field["PairIJ Coeffs"] nbc = nbc.sort_values(["id1", "id2"]).drop(["id1", "id2"], axis=1) nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)] elif "Pair Coeffs" in self.force_field: nbc = self.force_field["Pair Coeffs"].sort_index() nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)] topo_coeffs = {k: [] for k in SECTION_KEYWORDS["ff"][2:] if k in self.force_field} for kw in topo_coeffs.keys(): class2_coeffs = {k: list(v.itertuples(False, None)) for k, v in self.force_field.items() if k in CLASS2_KEYWORDS.get(kw, [])} ff_df = self.force_field[kw] for t in ff_df.itertuples(True, None): d = {"coeffs": list(t[1:]), "types": []} if class2_coeffs: d.update({k: list(v[t[0] - 1]) for k, v in class2_coeffs.items()}) topo_coeffs[kw].append(d) if self.topology: label_topo = lambda t: tuple(masses.loc[atoms_df.loc[t, "type"], "label"]) for k, v in self.topology.items(): ff_kw = k[:-1] + " Coeffs" for topo in v.itertuples(False, None): topo_idx = topo[0] - 1 indices = topo[1:] mids = atoms_df.loc[indices, "molecule-ID"].unique() assert len(mids) == 1, \ "Do not support intermolecular topology formed " \ "by atoms with different molecule-IDs" label = label_topo(indices) topo_coeffs[ff_kw][topo_idx]["types"].append(label) if data_by_mols[mids[0]].get(k): data_by_mols[mids[0]][k].append(indices) else: data_by_mols[mids[0]][k] = [indices] if topo_coeffs: for v in topo_coeffs.values(): for d in v: d["types"] = list(set(d["types"])) ff = ForceField(mass_info=mass_info, nonbond_coeffs=nonbond_coeffs, topo_coeffs=topo_coeffs) topo_list = [] for mid in unique_mids: data = data_by_mols[mid] atoms = data["Atoms"] shift = min(atoms.index) type_ids = atoms["type"] species = masses.loc[type_ids, "element"] labels = masses.loc[type_ids, "label"] coords = atoms[["x", "y", "z"]] m = Molecule(species.values, coords.values, site_properties={ff_label: labels.values}) charges = atoms.get("q") velocities = atoms[["vx", "vy", "vz"]] if "vx" in atoms.columns \ else None topologies = {} for kw in SECTION_KEYWORDS["topology"]: if data.get(kw): topologies[kw] = (np.array(data[kw]) - shift).tolist() topologies = None if not topologies else topologies topo_list.append(Topology(sites=m, ff_label=ff_label, charges=charges, velocities=velocities, topologies=topologies)) return self.box, ff, topo_list