def print_high_Tc(X_test, y_pred): path_w = 'test1_tc.txt' with open(path_w, mode='w') as f: for i in range(len(X_test)): if (y_pred[i] > 100): satom0 = Element.from_Z(int(X_test[i][0])).symbol.lstrip() satom1 = Element.from_Z(int(X_test[i][1])).symbol.lstrip() natom0 = int(X_test[i][2]) natom1 = int(X_test[i][3]) p = int(X_test[i][4]) tc = int(y_pred[i]) f.write('{:>2}{}{}{} P = {:>3} GPa Tc = {} K \n'.format( satom0, natom0, satom1, natom1, p, tc)) print('Predicted Tc is written in file {}'.format(path_w))
def fp_oganov(self, delta=0.01, sigma=0.01): struc_dist_x, struc_dist = self.structure_distances(delta=delta, sigma=sigma) fp_oganov = struc_dist.copy() vol = self.structure.volume for spec_pair in struc_dist: for i in range(len(struc_dist[spec_pair])): specie0 = Element.from_Z(spec_pair[0]) specie1 = Element.from_Z(spec_pair[1]) number_atoms0 = self.structure.composition[specie0] number_atoms1 = self.structure.composition[specie1] fp_oganov[spec_pair][i] *= vol / (delta * number_atoms0 * number_atoms1) fp_oganov[spec_pair][i] -= 1 return struc_dist_x, fp_oganov
def get_atom_feature( self, mol, # type: ignore atom) -> Dict: # type: ignore """ Generate all features of a particular atom Args: mol (pybel.Molecule): Molecule being evaluated atom (pybel.Atom): Specific atom being evaluated Return: (dict): All features for that atom """ # Get the link to the OpenBabel representation of the atom obatom = atom.OBAtom atom_idx = atom.idx - 1 # (pybel atoms indices start from 1) # Get the element element = Element.from_Z(obatom.GetAtomicNum()).symbol # Get the fast-to-compute properties output = { "element": element, "atomic_num": obatom.GetAtomicNum(), "formal_charge": obatom.GetFormalCharge(), "hybridization": 6 if element == 'H' else obatom.GetHyb(), "acceptor": obatom.IsHbondAcceptor(), "donor": obatom.IsHbondDonorH() if atom.type == 'H' else obatom.IsHbondDonor(), "aromatic": obatom.IsAromatic(), "coordid": atom.coordidx } # Get the chirality, if desired if 'chirality' in self.atom_features: # Determine whether the molecule has chiral centers chiral_cc = self._get_chiral_centers(mol) if atom_idx not in chiral_cc: output['chirality'] = 0 else: # 1 --> 'R', 2 --> 'S' output['chirality'] = 1 if chiral_cc[atom_idx] == 'R' else 2 # Find the rings, if desired if 'ring_sizes' in self.atom_features: rings = mol.OBMol.GetSSSR( ) # OpenBabel caches ring computation internally, no need to cache ourselves output['ring_sizes'] = [ r.Size() for r in rings if r.IsInRing(atom.idx) ] return output
def __init__(self): self.all_elemental_props = dict() available_props = [] self.data_dir = os.path.join(module_dir, "data_files", 'magpie_elementdata') # Make a list of available properties for datafile in glob(os.path.join(self.data_dir, "*.table")): available_props.append( os.path.basename(datafile).replace('.table', '')) # parse and store elemental properties for descriptor_name in available_props: with open( os.path.join(self.data_dir, '{}.table'.format(descriptor_name)), 'r') as f: self.all_elemental_props[descriptor_name] = dict() lines = f.readlines() for atomic_no in range(1, len(_pt_data) + 1): # max Z=103 try: if descriptor_name in ["OxidationStates"]: prop_value = [ float(i) for i in lines[atomic_no - 1].split() ] else: prop_value = float(lines[atomic_no - 1]) except ValueError: prop_value = float("NaN") self.all_elemental_props[descriptor_name][Element.from_Z( atomic_no).symbol] = prop_value
def __init__(self): self.all_elemental_props = dict() available_props = [] self.data_dir = os.path.join(module_dir, "data_files", 'magpie_elementdata') # Make a list of available properties for datafile in glob(os.path.join(self.data_dir, "*.table")): available_props.append( os.path.basename(datafile).replace('.table', '')) # parse and store elemental properties for descriptor_name in available_props: with open(os.path.join(self.data_dir, '{}.table'.format(descriptor_name)), 'r') as f: self.all_elemental_props[descriptor_name] = dict() lines = f.readlines() for atomic_no in range(1, len(_pt_data) + 1): # max Z=103 try: if descriptor_name in ["OxidationStates"]: prop_value = [float(i) for i in lines[atomic_no - 1].split()] else: prop_value = float(lines[atomic_no - 1]) except ValueError: prop_value = float("NaN") self.all_elemental_props[descriptor_name][ Element.from_Z(atomic_no).symbol] = prop_value
def print_high_Tc(X_test, y_pred): element = Element("H") for i in range(len(X_test)): if (y_pred[i] > 150): atom0 = element.from_Z(int(X_test[i][0])).symbol atom1 = element.from_Z(int(X_test[i][1])).symbol print('%2s%.1i%1s%.1i P = %.3i GPa Tc = %.3i K' \ % (atom0,X_test[i][2],atom1,X_test[i][3],int(X_test[i][4]),y_pred[i]))
def readLammps(desired_return): from pymatgen.io.lammps.outputs import parse_lammps_dumps, parse_lammps_log from pymatgen import Structure, Element from pymatgen.analysis.elasticity.stress import Stress from numpy import unique, array, argmin try: log = parse_lammps_log(filename="log.lammps")[-1] except IndexError: return_dict = {} for ret in desired_return: return_dict[ret] = None return return_dict result_dict = {} result_dict["energies"] = list(log['PotEng'])[-1] for dump in parse_lammps_dumps("dump.atoms"): atoms = dump.data coords = [''] * dump.natoms forces = [''] * dump.natoms masses = [''] * dump.natoms for atom in range(dump.natoms): coords[atoms["id"][atom] - 1] = [atoms["x"][atom], atoms["y"][atom], atoms["z"][atom]] forces[atoms['id'][atom] - 1] = [ atoms["fx"][atom], atoms["fy"][atom], atoms["fz"][atom] ] masses[atoms['id'][atom] - 1] = atoms["mass"][atom] box = dump.box unique_masses = unique(masses) ref_masses = [el.atomic_mass.real for el in Element] diff = abs(array(ref_masses) - unique_masses[:, None]) atomic_numbers = argmin(diff, axis=1) + 1 symbols = [Element.from_Z(an).symbol for an in atomic_numbers] species_map = {} for i in range(len(unique_masses)): species_map[unique_masses[i]] = symbols[i] atom_species = [species_map[mass] for mass in masses] result_dict["structures"] = Structure(box.to_lattice(), atom_species, coords, coords_are_cartesian=True) result_dict["forces"] = forces pressure = [ 1e-1 * list(log['c_press[{}]'.format(i)])[-1] for i in range(1, 7) ] result_dict["stresses"] = Stress([[pressure[0], pressure[3], pressure[4]], [pressure[3], pressure[1], pressure[5]], [pressure[4], pressure[5], pressure[2]]]) return_dict = {} for ret in desired_return: return_dict[ret] = result_dict[ret] return return_dict
def cell_to_structure( cell: Tuple[List[List[float]], List[List[float]], List[int]]) -> Structure: """ cell: (Lattice parameters [[a_x, a_y, a_z], [b_x, b_y, b_z], [c_x, c_y, c_z]], Fractional atomic coordinates in an Nx3 array, Z numbers of species in a length N array) """ species = [Element.from_Z(i) for i in cell[2]] return Structure(lattice=cell[0], coords=cell[1], species=species)
def find_seekpath_data(self) -> None: """Get full information of seekpath band path. """ self._seekpath_data = \ seekpath.get_explicit_k_path(structure=self.cell, symprec=self.symprec, angle_tolerance=self.angle_tolerance, with_time_reversal=self.time_reversal, reference_distance=self.ref_distance) lattice = self._seekpath_data["primitive_lattice"] element_types = self._seekpath_data["primitive_types"] species = [Element.from_Z(i) for i in element_types] positions = self._seekpath_data["primitive_positions"] self._band_primitive = Structure(lattice, species, positions)
def get_mol_species(mol): """ Returns a list of element symbols in a Molecule """ species = [] num_atoms = mol.NumAtoms() # element_table = ob.OBElementTable() for i in range(1, num_atoms + 1): a = mol.GetAtom(i) atomic_num = a.GetAtomicNum() # symbol = element_table.GetSymbol(atomic_num) symbol = Element.from_Z(atomic_num).symbol species.append(symbol) return species
def __init__(self): dfile = os.path.join(module_dir, "data_files/megnet_elemental_embedding.json") self._dummy = "Dummy" with open(dfile, "r") as fp: embeddings = json.load(fp) self.prop_names = ["embedding {}".format(i) for i in range(1, 17)] self.all_element_data = {} for i in range(95): embedding_dict = dict(zip(self.prop_names, embeddings[i])) if i == 0: self.all_element_data[self._dummy] = embedding_dict else: self.all_element_data[str(Element.from_Z(i))] = embedding_dict
def __init__(self): dfile = os.path.join(module_dir, "data_files/megnet_elemental_embedding.json") self._dummy = "Dummy" with open(dfile, "r") as fp: embeddings = json.load(fp) self.prop_names = ["embedding {}".format(i) for i in range(1, 17)] self.all_element_data = {} for i in range(95): embedding_dict = dict(zip(self.prop_names, embeddings[i])) if i == 0: self.all_element_data[self._dummy] = embedding_dict else: self.all_element_data[str(Element.from_Z(i))] = embedding_dict
def _parse(self): """ parse and store all elemental properties once and for all. """ for descriptor_name in self.available_props: with open(os.path.join(self.data_dir, '{}.table'.format(descriptor_name)), 'r') as f: lines = f.readlines() for atomic_no in range(1, len(_pt_data)+1): # max Z=103 try: if descriptor_name in ["OxidationStates"]: prop_value = [float(i) for i in lines[atomic_no - 1].split()] else: prop_value = float(lines[atomic_no - 1]) except ValueError: prop_value = float("NaN") self.all_elemental_props[descriptor_name][str(Element.from_Z(atomic_no))] = prop_value
def get_radius(self, mol): ''' Return a list of scaled radii for each atom in the Molecule, in au. ''' radius = [] ref_radius = CovalentRadius.radius num_atoms = mol.NumAtoms() # element_table = ob.OBElementTable() for i in range(1, num_atoms + 1): a = mol.GetAtom(i) atomic_num = a.GetAtomicNum() # symbol = element_table.GetSymbol(atomic_num) symbol = Element.from_Z(atomic_num).symbol if symbol in self.metals: scale = self.metal_radius_scale else: scale = self.covalent_radius_scale rad = ref_radius[symbol] * self.angstrom2au * scale radius.append(rad) return radius
def insert_elements(coll): print("adding missing elements.") for z in range(1, 19): el = Element.from_Z(z) r = coll.find(filter={"formula": "{}1".format(el.symbol)}) if r.count() == 0: try: clean_mol = Molecule([el], [[0, 0, 0]]) xyz = XYZ(clean_mol) bb = BabelMolAdaptor.from_string(str(xyz), "xyz") pbmol = pb.Molecule(bb.openbabel_mol) smiles = pbmol.write("smi").split()[0] can = pbmol.write("can").split()[0] inchi = pbmol.write("inchi") svg = pbmol.write("svg") d = {"molecule": clean_mol.as_dict()} comp = clean_mol.composition d["pretty_formula"] = comp.reduced_formula d["formula"] = comp.formula d["composition"] = comp.as_dict() d["elements"] = list(comp.as_dict().keys()) d["nelements"] = len(comp) d["charge"] = 0 d["spin_multiplicity"] = clean_mol.spin_multiplicity d["smiles"] = smiles d["can"] = can d["inchi"] = inchi # d["names"] = get_nih_names(smiles) d["svg"] = svg d["xyz"] = str(xyz) d["tags"] = ["G305 test set"] coll.insert(d) except Exception as ex: print("Error in {}".format(el)) elif r.count() > 1: print("More than 1 {} found. Removing...".format(el)) results = list(r) for r in results[1:]: print(r["_id"]) coll.remove({"_id": r["_id"]})
def get_el(obj: Union[Element, Specie, str, int]) -> str: """Utility method to get an element str from a symbol, Element, or Specie. Args: obj: An arbitrary object. Spported objects are Element/Specie objects, integers (representing atomic numbers), or strings (element symbols or species strings). Returns: The element as a string. """ if isinstance(obj, str): obj = get_el_sp(obj) if isinstance(obj, Element): return obj.name elif isinstance(obj, Specie): return obj.element.name elif isinstance(obj, int): return Element.from_Z(obj).name else: raise ValueError("Unsupported element type: {}.".format(type(obj)))
def __init__(self, period_tag=False, flatten=True): """Initialize the featurizer Args: period_tag (bool): In the original OFM, an element is represented by a vector of length 32, where each element is 1 or 0, which represents the valence subshell of the element. With period_tag=True, the vector size is increased to 39, where the 7 extra elements represent the period of the element. Note lanthanides are treated as period 6, actinides as period 7. Default False as in the original paper. """ my_ohvs = {} if period_tag: self.size = 39 else: self.size = 32 for Z in range(1, 95): el = Element.from_Z(Z) my_ohvs[Z] = self.get_ohv(el, period_tag) my_ohvs[Z] = np.matrix(my_ohvs[Z]) self.ohvs = my_ohvs self.flatten = flatten
def _GetSiteEnvironments(cls, coord, cell, SiteTypes, cutoff, pbc, get_permutations=True, eigen_tol=1e-5): """Extract local environments from primitive cell Parameters ---------- coord : n x 3 list or numpy array of scaled positions. n is the number of atom. cell : 3 x 3 list or numpy array SiteTypes : n list of string. String must be S or A followed by a number. S indicates a spectator sites and A indicates a active sites. cutoff : float. cutoff distance in angstrom for collecting local environment. pbc : list of boolean. Periodic boundary condition get_permutations : boolean. Whether to find permutatated neighbor list or not. eigen_tol : tolerance for eigenanalysis of point group analysis in pymatgen. Returns ------ list of local_env : list of local_env class """ #%% Check error assert isinstance(coord, (list, np.ndarray)) assert isinstance(cell, (list, np.ndarray)) assert len(coord) == len(SiteTypes) #%% Initialize # TODO: Technically, user doesn't even have to supply site index, because # pymatgen can be used to automatically categorize sites.. coord = np.mod(coord, 1) pbc = np.array(pbc) #%% Map sites to other elements.. # TODO: Available pymatgne functions are very limited when DummySpecie is # involved. This may be perhaps fixed in the future. Until then, we # simply bypass this by mapping site to an element # Find available atomic number to map site to it availableAN = [i + 1 for i in reversed(range(0, 118))] # Organize Symbols and record mapping symbols = [] site_idxs = [] SiteSymMap = {} # mapping SymSiteMap = {} for i, SiteType in enumerate(SiteTypes): if SiteType not in SiteSymMap: symbol = Element.from_Z(availableAN.pop()) SiteSymMap[SiteType] = symbol SymSiteMap[symbol] = SiteType else: symbol = SiteSymMap[SiteType] symbols.append(symbol) if 'A' in SiteType: site_idxs.append(i) #%% Get local environments of each site # Find neighbors and permutations using pymatgen lattice = Lattice(cell) structure = Structure(lattice, symbols, coord) neighbors = structure.get_all_neighbors(cutoff, include_index=True) site_envs = [] for site_idx in site_idxs: local_env_sym = [symbols[site_idx]] local_env_xyz = [structure[site_idx].coords] local_env_dist = [0.0] local_env_sitemap = [site_idx] for n in neighbors[site_idx]: # if PBC condition is fulfilled.. c = np.around(n[0].frac_coords, 10) withinPBC = np.logical_and(0 <= c, c < 1) if np.all(withinPBC[~pbc]): local_env_xyz.append(n[0].coords) local_env_sym.append(n[0].specie) local_env_dist.append(n[1]) local_env_sitemap.append(n[2]) local_env_xyz = np.subtract(local_env_xyz, np.mean(local_env_xyz, 0)) perm = [] if get_permutations: finder = PointGroupAnalyzer(Molecule(local_env_sym, local_env_xyz), eigen_tolerance=eigen_tol) pg = finder.get_pointgroup() for i, op in enumerate(pg): newpos = op.operate_multi(local_env_xyz) perm.append( np.argmin(cdist(local_env_xyz, newpos), axis=1).tolist()) site_env = { 'pos': local_env_xyz, 'sitetypes': [SymSiteMap[s] for s in local_env_sym], 'env2config': local_env_sitemap, 'permutations': perm, 'dist': local_env_dist } site_envs.append(site_env) return site_envs
print("The MAE of the linear ridge regression band gap model using the naive feature set is: "\ + str(round(abs(mean(scores)), 3)) + " eV") ############################################################################################################## # Let's see which features are most important for the linear model print("Below are the fitted linear ridge regression coefficients for each feature (i.e., element) in our naive feature set") linear.fit(naiveFeatures, bandgaps) # fit to the whole data set; we're not doing CV here print("element: coefficient") for i in range(MAX_Z): element = Element.from_Z(i + 1) print(element.symbol + ': ' + str(linear.coef_[i])) ############################################################################################################## # Create alternative feature set that is more physically-motivated physicalFeatures = [] for material in materials: theseFeatures = [] fraction = [] atomicNo = [] eneg = [] group = []
def disassemble(self, atom_labels=None, guess_element=True, ff_label="ff_map"): """ Breaks down LammpsData to building blocks (LammpsBox, ForceField and a series of Topology). RESTRICTIONS APPLIED: 1. No complex force field defined not just on atom types, where the same type or equivalent types of topology may have more than one set of coefficients. 2. No intermolecular topologies (with atoms from different molecule-ID) since a Topology object includes data for ONE molecule or structure only. Args: atom_labels ([str]): List of strings (must be different from one another) for labelling each atom type found in Masses section. Default to None, where the labels are automaticaly added based on either element guess or dummy specie assignment. guess_element (bool): Whether to guess the element based on its atomic mass. Default to True, otherwise dummy species "Qa", "Qb", ... will be assigned to various atom types. The guessed or assigned elements will be reflected on atom labels if atom_labels is None, as well as on the species of molecule in each Topology. ff_label (str): Site property key for labeling atoms of different types. Default to "ff_map". Returns: LammpsBox, ForceField, [Topology] """ atoms_df = self.atoms.copy() if "nx" in atoms_df.columns: atoms_df[["x", "y", "z"]] += \ self.box.get_box_shift(atoms_df[["nx", "ny", "nz"]].values) atoms_df = pd.concat([atoms_df, self.velocities], axis=1) mids = atoms_df.get("molecule-ID") if mids is None: unique_mids = [1] data_by_mols = {1: {"Atoms": atoms_df}} else: unique_mids = np.unique(mids) data_by_mols = {} for k in unique_mids: df = atoms_df[atoms_df["molecule-ID"] == k] data_by_mols[k] = {"Atoms": df} masses = self.masses.copy() masses["label"] = atom_labels unique_masses = np.unique(masses["mass"]) if guess_element: ref_masses = [el.atomic_mass.real for el in Element] diff = np.abs(np.array(ref_masses) - unique_masses[:, None]) atomic_numbers = np.argmin(diff, axis=1) + 1 symbols = [Element.from_Z(an).symbol for an in atomic_numbers] else: symbols = ["Q%s" % a for a in map(chr, range(97, 97 + len(unique_masses)))] for um, s in zip(unique_masses, symbols): masses.loc[masses["mass"] == um, "element"] = s if atom_labels is None: # add unique labels based on elements for el, vc in masses["element"].value_counts().iteritems(): masses.loc[masses["element"] == el, "label"] = \ ["%s%d" % (el, c) for c in range(1, vc + 1)] assert masses["label"].nunique(dropna=False) == len(masses), \ "Expecting unique atom label for each type" mass_info = [tuple([r["label"], r["mass"]]) for _, r in masses.iterrows()] nonbond_coeffs, topo_coeffs = None, None if self.force_field: if "PairIJ Coeffs" in self.force_field: nbc = self.force_field["PairIJ Coeffs"] nbc = nbc.sort_values(["id1", "id2"]).drop(["id1", "id2"], axis=1) nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)] elif "Pair Coeffs" in self.force_field: nbc = self.force_field["Pair Coeffs"].sort_index() nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)] topo_coeffs = {k: [] for k in SECTION_KEYWORDS["ff"][2:] if k in self.force_field} for kw in topo_coeffs.keys(): class2_coeffs = {k: list(v.itertuples(False, None)) for k, v in self.force_field.items() if k in CLASS2_KEYWORDS.get(kw, [])} ff_df = self.force_field[kw] for t in ff_df.itertuples(True, None): d = {"coeffs": list(t[1:]), "types": []} if class2_coeffs: d.update({k: list(v[t[0] - 1]) for k, v in class2_coeffs.items()}) topo_coeffs[kw].append(d) if self.topology: label_topo = lambda t: tuple(masses.loc[atoms_df.loc[t, "type"], "label"]) for k, v in self.topology.items(): ff_kw = k[:-1] + " Coeffs" for topo in v.itertuples(False, None): topo_idx = topo[0] - 1 indices = topo[1:] mids = atoms_df.loc[indices, "molecule-ID"].unique() assert len(mids) == 1, \ "Do not support intermolecular topology formed " \ "by atoms with different molecule-IDs" label = label_topo(indices) topo_coeffs[ff_kw][topo_idx]["types"].append(label) if data_by_mols[mids[0]].get(k): data_by_mols[mids[0]][k].append(indices) else: data_by_mols[mids[0]][k] = [indices] if topo_coeffs: for v in topo_coeffs.values(): for d in v: d["types"] = list(set(d["types"])) ff = ForceField(mass_info=mass_info, nonbond_coeffs=nonbond_coeffs, topo_coeffs=topo_coeffs) topo_list = [] for mid in unique_mids: data = data_by_mols[mid] atoms = data["Atoms"] shift = min(atoms.index) type_ids = atoms["type"] species = masses.loc[type_ids, "element"] labels = masses.loc[type_ids, "label"] coords = atoms[["x", "y", "z"]] m = Molecule(species.values, coords.values, site_properties={ff_label: labels.values}) charges = atoms.get("q") velocities = atoms[["vx", "vy", "vz"]] if "vx" in atoms.columns \ else None topologies = {} for kw in SECTION_KEYWORDS["topology"]: if data.get(kw): topologies[kw] = (np.array(data[kw]) - shift).tolist() topologies = None if not topologies else topologies topo_list.append(Topology(sites=m, ff_label=ff_label, charges=charges, velocities=velocities, topologies=topologies)) return self.box, ff, topo_list
print(fe.thermal_conductivity) # html print(fe.boiling_point) # html print(fe.melting_point) # html print(fe.critical_temperature) # html print(fe.superconduction_temperature) # html print(fe.liquid_range) # html print(fe.bulk_modulus) # html print(fe.youngs_modulus) # html print(fe.brinell_hardness) # html print(fe.rigidity_modulus) # html print(fe.mineral_hardness) # html print(fe.vickers_hardness) # html print(fe.density_of_solid) # html print(fe.coefficient_of_linear_thermal_expansion) # html print(fe.average_ionic_radius) print(fe.ionic_radii) print(fe.is_valid_symbol("Fe")) # dir print(fe.from_Z(26)) # dir print(fe.as_dict()) # dir print(fe.from_dict(fe.as_dict())) # dir print(fe.from_row_and_group(4, 8)) # dir print(fe.ground_state_term_symbol) # dir print(fe.icsd_oxidation_states) # dir print(fe.name) # dir print(fe.print_periodic_table()) # dir print(fe.term_symbols) # dir print(fe.valence) # dir print(fe.value) # dir print(fe.boiling_point) print(float(fe.boiling_point.to("")))
def element(): for z in range(1, 100): e = Element.from_Z(z) if e not in structure_for_visualize.composition: yield e
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") num_elec_patt = re.compile("(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile( "^\s*(Mulliken charges|Mulliken atomic charges)") mulliken_charge_patt = re.compile( '^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") forces_on_patt = re.compile( "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)") forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*") forces_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)") freq_on_patt = re.compile( "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*") freq_patt = re.compile("Frequencies\s--\s+(.*)") normal_mode_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*") mo_coeff_patt = re.compile("Molecular Orbital Coefficients:") mo_coeff_name_patt = re.compile("\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} self.link0 = {} self.cart_forces = [] self.frequencies = [] self.eigenvalues = [] self.is_spin = False coord_txt = [] read_coord = 0 read_mulliken = False read_eigen = False eigen_txt = [] parse_stage = 0 num_basis_found = False terminated = False parse_forces = False forces = [] parse_freq = False frequencies = [] read_mo = False with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif link0_patt.match(line): m = link0_patt.match(line) self.link0[m.group(1)] = m.group(2) elif route_patt.search(line): params = read_route_line(line) self.functional = params[0] self.basis_set = params[1] self.route = params[2] self.dieze_tag = params[3] parse_stage = 1 elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append([float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) if parse_forces: m = forces_patt.search(line) if m: forces.extend([float(_v) for _v in m.groups()[2:5]]) elif forces_off_patt.search(line): self.cart_forces.append(forces) forces = [] parse_forces = False # read molecular orbital eigenvalues if read_eigen: m = orbital_patt.search(line) if m: eigen_txt.append(line) else: read_eigen = False self.eigenvalues = {Spin.up: []} for eigenline in eigen_txt: if "Alpha" in eigenline: self.eigenvalues[Spin.up] += [float(e) for e in float_patt.findall(eigenline)] elif "Beta" in eigenline: if Spin.down not in self.eigenvalues: self.eigenvalues[Spin.down] = [] self.eigenvalues[Spin.down] += [float(e) for e in float_patt.findall(eigenline)] eigen_txt = [] # read molecular orbital coefficients if read_mo: # build a matrix with all coefficients all_spin = [Spin.up] if self.is_spin: all_spin.append(Spin.down) mat_mo = {} for spin in all_spin: mat_mo[spin] = np.zeros((self.num_basis_func, self.num_basis_func)) nMO = 0 end_mo = False while nMO < self.num_basis_func and not end_mo: f.readline() f.readline() self.atom_basis_labels = [] for i in range(self.num_basis_func): line = f.readline() # identify atom and OA labels m = mo_coeff_name_patt.search(line) if m.group(1).strip() != "": iat = int(m.group(2)) - 1 # atname = m.group(3) self.atom_basis_labels.append([m.group(4)]) else: self.atom_basis_labels[iat].append(m.group(4)) # MO coefficients coeffs = [float(c) for c in float_patt.findall(line)] for j in range(len(coeffs)): mat_mo[spin][i, nMO + j] = coeffs[j] nMO += len(coeffs) line = f.readline() # manage pop=regular case (not all MO) if nMO < self.num_basis_func and \ ("Density Matrix:" in line or mo_coeff_patt.search(line)): end_mo = True warnings.warn("POP=regular case, matrix coefficients not complete") f.readline() self.eigenvectors = mat_mo read_mo = False # build a more convenient array dict with MO coefficient of # each atom in each MO. # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... } mo = {} for spin in all_spin: mo[spin] = [[{} for iat in range(len(self.atom_basis_labels))] for j in range(self.num_basis_func)] for j in range(self.num_basis_func): i = 0 for iat in range(len(self.atom_basis_labels)): for label in self.atom_basis_labels[iat]: mo[spin][j][iat][label] = self.eigenvectors[spin][i, j] i += 1 self.molecular_orbital = mo elif parse_freq: m = freq_patt.search(line) if m: values = [float(_v) for _v in m.groups()[0].split()] for value in values: frequencies.append([value, []]) elif normal_mode_patt.search(line): values = [float(_v) for _v in line.split()[2:]] n = int(len(values) / 3) for i in range(0, len(values), 3): j = -n + int(i / 3) frequencies[j][1].extend(values[i:i+3]) elif line.find("-------------------") != -1: parse_freq = False self.frequencies.append(frequencies) frequencies = [] elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization " "error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif num_elec_patt.search(line): m = num_elec_patt.search(line) self.electrons = (int(m.group(1)), int(m.group(2))) elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append(float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif not read_eigen and orbital_patt.search(line): eigen_txt.append(line) read_eigen = True elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = True elif not parse_forces and forces_on_patt.search(line): parse_forces = True elif freq_on_patt.search(line): parse_freq = True elif mo_coeff_patt.search(line): if "Alpha" in line: self.is_spin = True read_mo = True if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = {int(m.group(1)): [m.group(2), float(m.group(3))]} mulliken_charges.update(dict) read_mulliken = False self.Mulliken_charges = mulliken_charges if not terminated: #raise IOError("Bad Gaussian output file.") warnings.warn("\n" + self.filename + \ ": Termination error or bad Gaussian output file !")
def main(): col_names = ['compound', 'bandgap'] # file has no header, so we add our own bg = pd.read_csv('bandgapDFT.csv', names=col_names, header=None) print bg.head() # construct naive feature set and add it to the existing data frame compositions = bg['compound'].apply( Composition) # this is a Pandas series containing the compositions bg['naive_features'] = compositions.apply( naiveVectorize ) # apply the naiveVectorize functinon on the compositions print bg.head() # Establish baseline accuracy by "guessing the average" of the band gap set # A good model should never do worse. baselineError = mean(abs(mean(bg.bandgap) - bg.bandgap)) print("The MAE of always guessing the average band gap is: " + str(round(baselineError, 3)) + " eV") # Train linear ridge regression model using naive feature set linear = linear_model.Ridge( alpha=0.5 ) # alpha is a tuning parameter affecting how regression deals with collinear inputs cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0) naive_scores = cross_val_score(linear, list(bg['naive_features']), bg['bandgap'], cv=cv, scoring='neg_mean_absolute_error') print( "The MAE of the linear ridge regression band gap model using the naive feature set is: " + str(round(abs(mean(naive_scores)), 3)) + " eV") # Let's see which features are most important for the linear model print( "Below are the fitted linear ridge regression coefficients for each feature (i.e., element) in our naive feature set" ) linear.fit( list(bg['naive_features']), bg['bandgap']) # fit to the whole data set; we're not doing CV here print("element: coefficient") for i in range(MAX_Z): element = Element.from_Z(i + 1) print(element.symbol + ': ' + str(linear.coef_[i])) # Create alternative feature set that is more physically-motivated bg['physical_features'] = compositions.apply(get_physical_features) print bg.head() physical_scores = cross_val_score(linear, list(bg['physical_features']), bg['bandgap'], cv=cv, scoring='neg_mean_absolute_error') print( "The MAE of the linear ridge regression band gap model using the physical feature set is: " + str(round(abs(mean(physical_scores)), 3)) + " eV") # Random forest rfr = ensemble.RandomForestRegressor( n_estimators=10) # try 10 trees in the forest naive_scores = cross_val_score(rfr, list(bg['naive_features']), bg['bandgap'], cv=cv, scoring='neg_mean_absolute_error') print( "The MAE of the nonlinear random forest band gap model using the naive feature set is: " + str(round(abs(mean(naive_scores)), 3)) + " eV") physical_scores = cross_val_score(rfr, list(bg['physical_features']), bg['bandgap'], cv=cv, scoring='neg_mean_absolute_error') print( "The MAE of the nonlinear random forest band gap model using the physical feature set is: " + str(round(abs(mean(physical_scores)), 3)) + " eV")
def from_file(feff_inp_file="feff.inp", ldos_file="ldos"): """ Creates LDos object from raw Feff ldos files by by assuming they are numbered consecutively, i.e. ldos01.dat ldos02.dat... Args: feff_inp_file (str): input file of run to obtain structure ldos_file (str): output ldos file of run to obtain dos info, etc. """ header_str = Header.header_string_from_file(feff_inp_file) header = Header.from_string(header_str) structure = header.struct nsites = structure.num_sites parameters = Tags.from_file(feff_inp_file) if "RECIPROCAL" in parameters: pot_dict = dict() pot_readstart = re.compile(".*iz.*lmaxsc.*xnatph.*xion.*folp.*") pot_readend = re.compile(".*ExternalPot.*switch.*") pot_inp = re.sub(r"feff.inp", r"pot.inp", feff_inp_file) dos_index = 1 begin = 0 with zopen(pot_inp, "r") as potfile: for line in potfile: if len(pot_readend.findall(line)) > 0: break if begin == 1: begin += 1 continue if begin == 2: z_number = int(line.strip().split()[0]) ele_name = Element.from_Z(z_number).name if ele_name not in pot_dict: pot_dict[ele_name] = dos_index else: pot_dict[ele_name] = min(dos_index, pot_dict[ele_name]) dos_index += 1 if len(pot_readstart.findall(line)) > 0: begin = 1 else: pot_string = Potential.pot_string_from_file(feff_inp_file) dicts = Potential.pot_dict_from_string(pot_string) pot_dict = dicts[0] with zopen(ldos_file + "00.dat", "r") as fobject: f = fobject.readlines() efermi = float(f[0].split()[4]) dos_energies = [] ldos = {} for i in range(1, len(pot_dict) + 1): if len(str(i)) == 1: ldos[i] = np.loadtxt("{}0{}.dat".format(ldos_file, i)) else: ldos[i] = np.loadtxt("{}{}.dat".format(ldos_file, i)) for i in range(0, len(ldos[1])): dos_energies.append(ldos[1][i][0]) all_pdos = [] vorb = { "s": Orbital.s, "p": Orbital.py, "d": Orbital.dxy, "f": Orbital.f0 } forb = {"s": 0, "p": 1, "d": 2, "f": 3} dlength = len(ldos[1]) for i in range(nsites): pot_index = pot_dict[structure.species[i].symbol] all_pdos.append(defaultdict(dict)) for k, v in vorb.items(): density = [ ldos[pot_index][j][forb[k] + 1] for j in range(dlength) ] updos = density downdos = None if downdos: all_pdos[-1][v] = {Spin.up: updos, Spin.down: downdos} else: all_pdos[-1][v] = {Spin.up: updos} pdos = all_pdos vorb2 = {0: Orbital.s, 1: Orbital.py, 2: Orbital.dxy, 3: Orbital.f0} pdoss = { structure[i]: {v: pdos[i][v] for v in vorb2.values()} for i in range(len(pdos)) } forb = {"s": 0, "p": 1, "d": 2, "f": 3} tdos = [0] * dlength for i in range(nsites): pot_index = pot_dict[structure.species[i].symbol] for v in forb.values(): density = [ldos[pot_index][j][v + 1] for j in range(dlength)] for j in range(dlength): tdos[j] = tdos[j] + density[j] tdos = {Spin.up: tdos} dos = Dos(efermi, dos_energies, tdos) complete_dos = CompleteDos(structure, dos, pdoss) charge_transfer = LDos.charge_transfer_from_file( feff_inp_file, ldos_file) return LDos(complete_dos, charge_transfer)
def _get_bond_type(graph) -> Dict: new_graph = deepcopy(graph) elements = [Element.from_Z(i) for i in graph["atom"]] for k, (i, j) in enumerate(zip(graph["index1"], graph["index2"])): new_graph["bond"][k] = elements[i].is_metal + elements[j].is_metal return new_graph
def Bk_symbol(): return [str(Element.from_Z(97))]
cv = cross_validation.ShuffleSplit(len(bandgaps),\ n_iter=10, test_size=0.1, random_state=0) scores = cross_validation.cross_val_score(linear, naiveFeatures,\ bandgaps, cv=cv, scoring='mean_absolute_error') print("The MAE of model using the naive feature set is: "\ + str(round(abs(mean(scores)), 3)) + " eV") print("Below naive feature set") linear.fit(naiveFeatures, bandgaps) # fit to the whole data set print("element: coefficient") for i in range(MAX_Z): element = Element.from_Z(i + 1) print(element.symbol + ': ' + str(linear.coef_[i])) ####To be continued # more physically-motivated physicalFeatures = [] for material in materials: theseFeatures = [] fraction = [] atomicNo = [] eneg = [] group = [] for element in material: fraction.append(material.get_atomic_fraction(element)) atomicNo.append(float(element.Z)) eneg.append(element.X)
def disassemble(self, atom_labels=None, guess_element=True, ff_label="ff_map"): """ Breaks down LammpsData to ForceField and a series of Topology. RESTRICTIONS APPLIED: 1. No complex force field defined not just on atom types, where the same type or equivalent types of topology may have more than one set of coefficients. 2. No intermolecular topologies (with atoms from different molecule-ID) since a Topology object includes data for ONE molecule or structure only. Args: atom_labels ([str]): List of strings (must be different from one another) for labelling each atom type found in Masses section. Default to None, where the labels are automaticaly added based on either element guess or dummy specie assignment. guess_element (bool): Whether to guess the element based on its atomic mass. Default to True, otherwise dummy species "Qa", "Qb", ... will be assigned to various atom types. The guessed or assigned elements will be reflected on atom labels if atom_labels is None, as well as on the species of molecule in each Topology. ff_label (str): Site property key for labeling atoms of different types. Default to "ff_map". Returns: ForceField, [Topology] """ atoms_df = self.atoms.copy() if "nx" in atoms_df.columns: box_dim = np.ptp(self.box_bounds, axis=1) atoms_df[["x", "y", "z"]] += atoms_df[["nx", "ny", "nz"]].values \ * box_dim atoms_df = pd.concat([atoms_df, self.velocities], axis=1) mids = atoms_df.get("molecule-ID") if mids is None: unique_mids = [1] data_by_mols = {1: {"Atoms": atoms_df}} else: unique_mids = np.unique(mids) data_by_mols = {} for k in unique_mids: df = atoms_df[atoms_df["molecule-ID"] == k] data_by_mols[k] = {"Atoms": df} masses = self.masses.copy() masses["label"] = atom_labels unique_masses = np.unique(masses["mass"]) if guess_element: ref_masses = sorted([el.atomic_mass.real for el in Element]) diff = np.abs(np.array(ref_masses) - unique_masses[:, None]) atomic_numbers = np.argmin(diff, axis=1) + 1 symbols = [Element.from_Z(an).symbol for an in atomic_numbers] else: symbols = ["Q%s" % a for a in map(chr, range(97, 97 + len(unique_masses)))] for um, s in zip(unique_masses, symbols): masses.loc[masses["mass"] == um, "element"] = s if atom_labels is None: # add unique labels based on elements for el, vc in masses["element"].value_counts().iteritems(): masses.loc[masses["element"] == el, "label"] = \ ["%s%d" % (el, c) for c in range(1, vc + 1)] assert masses["label"].nunique(dropna=False) == len(masses), \ "Expecting unique atom label for each type" mass_info = [tuple([r["label"], r["mass"]]) for _, r in masses.iterrows()] nonbond_coeffs, topo_coeffs = None, None if self.force_field: if "PairIJ Coeffs" in self.force_field: nbc = self.force_field["PairIJ Coeffs"] nbc = nbc.sort_values(["id1", "id2"]).drop(["id1", "id2"], axis=1) nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)] elif "Pair Coeffs" in self.force_field: nbc = self.force_field["Pair Coeffs"].sort_index() nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)] topo_coeffs = {k: [] for k in SECTION_KEYWORDS["ff"][2:] if k in self.force_field} for kw in topo_coeffs.keys(): class2_coeffs = {k: list(v.itertuples(False, None)) for k, v in self.force_field.items() if k in CLASS2_KEYWORDS.get(kw, [])} ff_df = self.force_field[kw] for t in ff_df.itertuples(True, None): d = {"coeffs": list(t[1:]), "types": []} if class2_coeffs: d.update({k: list(v[t[0] - 1]) for k, v in class2_coeffs.items()}) topo_coeffs[kw].append(d) if self.topology: label_topo = lambda t: tuple(masses.loc[atoms_df.loc[t, "type"], "label"]) for k, v in self.topology.items(): ff_kw = k[:-1] + " Coeffs" for topo in v.itertuples(False, None): topo_idx = topo[0] - 1 indices = topo[1:] mids = atoms_df.loc[indices, "molecule-ID"].unique() assert len(mids) == 1, \ "Do not support intermolecular topology formed " \ "by atoms with different molecule-IDs" label = label_topo(indices) topo_coeffs[ff_kw][topo_idx]["types"].append(label) if data_by_mols[mids[0]].get(k): data_by_mols[mids[0]][k].append(indices) else: data_by_mols[mids[0]][k] = [indices] if topo_coeffs: for v in topo_coeffs.values(): for d in v: d["types"] = list(set(d["types"])) ff = ForceField(mass_info=mass_info, nonbond_coeffs=nonbond_coeffs, topo_coeffs=topo_coeffs) topo_list = [] for mid in unique_mids: data = data_by_mols[mid] atoms = data["Atoms"] shift = min(atoms.index) type_ids = atoms["type"] species = masses.loc[type_ids, "element"] labels = masses.loc[type_ids, "label"] coords = atoms[["x", "y", "z"]] m = Molecule(species.values, coords.values, site_properties={ff_label: labels.values}) charges = atoms.get("q") velocities = atoms[["vx", "vy", "vz"]] if "vx" in atoms.columns \ else None topologies = {} for kw in SECTION_KEYWORDS["topology"]: if data.get(kw): topologies[kw] = (np.array(data[kw]) - shift).tolist() topologies = None if not topologies else topologies topo_list.append(Topology(sites=m, ff_label=ff_label, charges=charges, velocities=velocities, topologies=topologies)) return ff, topo_list
def charge_transfer_from_file(feff_inp_file, ldos_file): """ Get charge transfer from file. Args: feff_inp_file (str): name of feff.inp file for run ldos_file (str): ldos filename for run, assume consequetive order, i.e., ldos01.dat, ldos02.dat.... Returns: dictionary of dictionaries in order of potential sites ({"p": 0.154, "s": 0.078, "d": 0.0, "tot": 0.232}, ...) """ cht = OrderedDict() parameters = Tags.from_file(feff_inp_file) if "RECIPROCAL" in parameters: dicts = [dict()] pot_dict = dict() dos_index = 1 begin = 0 pot_inp = re.sub(r"feff.inp", r"pot.inp", feff_inp_file) pot_readstart = re.compile(".*iz.*lmaxsc.*xnatph.*xion.*folp.*") pot_readend = re.compile(".*ExternalPot.*switch.*") with zopen(pot_inp, "r") as potfile: for line in potfile: if len(pot_readend.findall(line)) > 0: break if begin == 1: z_number = int(line.strip().split()[0]) ele_name = Element.from_Z(z_number).name if len(pot_dict) == 0: pot_dict[0] = ele_name elif len(pot_dict) > 0: pot_dict[max(pot_dict.keys()) + 1] = ele_name begin += 1 continue if begin == 2: z_number = int(line.strip().split()[0]) ele_name = Element.from_Z(z_number).name dicts[0][ele_name] = dos_index dos_index += 1 if len(pot_dict) == 0: pot_dict[0] = ele_name elif len(pot_dict) > 0: pot_dict[max(pot_dict.keys()) + 1] = ele_name if len(pot_readstart.findall(line)) > 0: begin = 1 else: pot_string = Potential.pot_string_from_file(feff_inp_file) dicts = Potential.pot_dict_from_string(pot_string) pot_dict = dicts[1] for i in range(0, len(dicts[0]) + 1): if len(str(i)) == 1: with zopen("{}0{}.dat".format(ldos_file, i), "rt") as fobject: f = fobject.readlines() s = float(f[3].split()[2]) p = float(f[4].split()[2]) d = float(f[5].split()[2]) f1 = float(f[6].split()[2]) tot = float(f[1].split()[4]) cht[str(i)] = { pot_dict[i]: { "s": s, "p": p, "d": d, "f": f1, "tot": tot } } else: with zopen(ldos_file + str(i) + ".dat", "rt") as fid: f = fid.readlines() s = float(f[3].split()[2]) p = float(f[4].split()[2]) d = float(f[5].split()[2]) f1 = float(f[6].split()[2]) tot = float(f[1].split()[4]) cht[str(i)] = { pot_dict[i]: { "s": s, "p": p, "d": d, "f": f1, "tot": tot } } return cht
def feature_labels(self): labels = [] for i in range(1, 104): labels.append(Element.from_Z(i).symbol) return labels
features.extend(atomicNo) features.extend(natom) features.append(pressure) pf.append(features[:]) # }}} # # set X_train, y_train, X_test # {{{ X = pf[:] y = tc[:] X_train = X[:] y_train = y[:] materials = [] xatom=Element("H") for i in range(3,86): if(not xatom.from_Z(i).is_noble_gas): for iatom1 in range(1,10): for iatom2 in range(1,10): # print('%s%.1i%s%.1i' % (xatom.from_Z(i).symbol,iatom1,xatom.symbol,iatom2)) str_mat=str(xatom.from_Z(i).symbol)+str(iatom1)+str(xatom.symbol)+str(iatom2) materials.append(Composition(str_mat)) X_test = [] for material in materials: atomicNo = [] natom = [] for element in material: natom.append(material.get_atomic_fraction(element)*material.num_atoms) atomicNo.append(float(element.Z)) # atom0=element.from_Z(atomicNo[0]).symbol # atom1=element.from_Z(atomicNo[1]).symbol
def get_translations(structure, structural_type='100'): assert structural_type in ['100', '110'] metal = [ Element.from_Z(z).symbol for z in set(structure.atomic_numbers) if Element.from_Z(z).is_metal or Element.from_Z(z).is_metalloid ] mul_structures, conn_components_, ab_indices = [], [], [0, 1, 2] conn_indices = [[2, 1, 1], [1, 2, 1], [1, 1, 2]] number_connected_components = [ conn_comps_sci(adjacency_matrix(structure.__mul__(i)))[0] for i in conn_indices ] c_index = number_connected_components.index( max(number_connected_components)) ab_indices.remove(c_index) extended_structure = structure.__mul__(3) extended_components = list( conn_comps_netx(from_numpy_matrix( adjacency_matrix(extended_structure)))) extended_sites = [[extended_structure[i] for i in components] for components in extended_components] layers = [ s for s in extended_sites if metal[0] in [site.specie.symbol for site in s] ] max_coords = [ max([a.coords[c_index] for a in layer if a.specie.symbol == metal[0]]) for layer in layers ] first_layer_index = max_coords.index(sorted(max_coords)[0]) second_layer_index = max_coords.index(sorted(max_coords)[1]) first_layer_coords = array([ a.coords for a in layers[first_layer_index] if a.specie.symbol == metal[0] ]) second_layer_coords = array([ a.coords for a in layers[second_layer_index] if a.specie.symbol == metal[0] ]) if structural_type == '110': first_layer_coords = first_layer_coords[ first_layer_coords[:, c_index].argsort( )][:int(first_layer_coords.shape[0] / 2), :] second_layer_coords = second_layer_coords[ second_layer_coords[:, c_index].argsort( )][:int(second_layer_coords.shape[0] / 2), :] a_axis = extended_structure.lattice.matrix[ab_indices][0] b_axis = extended_structure.lattice.matrix[ab_indices][1] perp = cross(a_axis / norm(a_axis), b_axis / norm(b_axis)) dir_1 = sorted([ c[0] - c[1] for c in combinations(first_layer_coords, 2) if abs(dot(c[0] - c[1], perp) / norm(c[0] - c[1]) / norm(perp)) < 0.07 ], key=norm)[0] m_dist = norm(dir_1) dir_1 = dir_1 / norm(dir_1) dir_2 = cross(perp / norm(perp), dir_1 / norm(dir_1)) a_projections, b_projections = [], [] for site_coords in first_layer_coords: nearest_site = second_layer_coords[KDTree(second_layer_coords).query( site_coords)[1]] a_projections.append(dot((site_coords - nearest_site), dir_1)) b_projections.append(dot((site_coords - nearest_site), dir_2)) a_translation = min([ min(abs(m_dist - abs(p) % m_dist), abs(p) % m_dist) for p in a_projections ]) / m_dist if structural_type == '110': m_dist = m_dist * sqrt(2) b_translation = min([ min(abs(m_dist - abs(p) % m_dist), abs(p) % m_dist) for p in b_projections ]) / m_dist return sorted([round(a_translation, 2), round(b_translation, 2)])
def _get_bond_type(graph) -> Dict: new_graph = deepcopy(graph) elements = [Element.from_Z(i) for i in graph['atom']] for k, (i, j) in enumerate(zip(graph['index1'], graph['index2'])): new_graph['bond'][k] = elements[i].is_metal + elements[j].is_metal return new_graph
def max_z_Cm_symbol(): return [str(Element.from_Z(96))]