def get_insertion_energy( base_entry: ComputedStructureEntry, inserted_entry: ComputedStructureEntry, migrating_ion_entry: ComputedEntry, ) -> float: """ Calculate the insertion energy for a given inserted entry Args: base_entry: The entry for the host structure inserted_entry: The entry for the inserted structure migrating_ion_entry: The entry for the metallic phase of the working ion Returns: The insertion energy defined as (E[inserted] - (E[Base] + n * E[working_ion]))/(n) Where n is the number of working ions and E[inserted]. Additionally, and E[base] and E[inserted] are for structures of the same size (sans working ion) """ wi_ = str(migrating_ion_entry.composition.elements[0]) comp_inserted_no_wi = inserted_entry.composition.as_dict() comp_inserted_no_wi.pop(wi_) comp_inserted_no_wi = Composition.from_dict(comp_inserted_no_wi) _, factor_inserted = comp_inserted_no_wi.get_reduced_composition_and_factor( ) _, factor_base = base_entry.composition.get_reduced_composition_and_factor( ) e_base = base_entry.energy * factor_inserted / factor_base e_insert = inserted_entry.energy e_wi = migrating_ion_entry.energy_per_atom n_wi = inserted_entry.composition[wi_] return (e_insert - (e_base + n_wi * e_wi)) / n_wi
def _tidy_column(self, df, featurizer_type): """ Various conversions to homogenize columns for featurization input. For example, take a column of compositions and ensure they are decorated with oxidation states, are not strings, etc. Args: df (pandas.DataFrame) featurizer_type: The key defining the featurizer input. For example, composition featurizers should have featurizer_type of "composition". Returns: df (pandas.DataFrame): DataFrame with featurizer_type column ready for featurization. """ # todo: Make the following conversions more robust (no [0] type checking) type_tester = df[featurizer_type].iloc[0] if featurizer_type == self.composition_col: # Convert formulas to composition objects if isinstance(type_tester, str): self.logger.info( self._log_prefix + "Compositions detected as strings. Attempting " "conversion to Composition objects...") stc = StrToComposition(overwrite_data=True, target_col_id=featurizer_type) df = stc.featurize_dataframe(df, featurizer_type, multiindex=self.multiindex, ignore_errors=True, inplace=False) elif isinstance(type_tester, dict): self.logger.info(self._log_prefix + "Compositions detected as dicts. Attempting " "conversion to Composition objects...") df[featurizer_type] = [ Composition.from_dict(d) for d in df[featurizer_type] ] # Convert non-oxidstate containing comps to oxidstate comps if self.guess_oxistates: self.logger.info( self._log_prefix + "Guessing oxidation states of compositions, as" " they were not present in input.") cto = CompositionToOxidComposition( target_col_id=featurizer_type, overwrite_data=True, return_original_on_error=True, max_sites=-50) try: df = cto.featurize_dataframe(df, featurizer_type, multiindex=self.multiindex, inplace=False) except Exception as e: self.logger.info(self._log_prefix + "Could not decorate oxidation states due " "to {}. Excluding featurizers based on " "composition oxistates".format(e)) classes_require_oxi = [ c.__class__.__name__ for c in CompositionFeaturizers().need_oxi ] self.exclude.extend(classes_require_oxi) else: # Convert structure/bs/dos dicts to objects (robust already) if isinstance(type_tester, (dict, str)): self.logger.info(self._log_prefix.capitalize() + "{} detected as string or dict. Attempting " "conversion to {} objects..." "".format(featurizer_type, featurizer_type)) if isinstance(type_tester, str): raise ValueError("{} column is type {}. Cannot convert." "".format(featurizer_type, type(type_tester))) dto = DictToObject(overwrite_data=True, target_col_id=featurizer_type) df = dto.featurize_dataframe(df, featurizer_type, inplace=False) # Decorate with oxidstates if featurizer_type == self.structure_col and \ self.guess_oxistates: self.logger.info( self._log_prefix + "Guessing oxidation states of structures if they were " "not present in input.") sto = StructureToOxidStructure( target_col_id=featurizer_type, overwrite_data=True, return_original_on_error=True, max_sites=-50) try: df = sto.featurize_dataframe( df, featurizer_type, multiindex=self.multiindex, inplace=False) except Exception as e: self.logger.info( self._log_prefix + "Could not decorate oxidation states on structures " "due to {}.".format(e)) return df
def __post_init__(self, target, vertex_elements): self.target: Composition = target if isinstance(target, Composition) \ else Composition.from_dict(target) self.vertex_elements = vertex_elements or sorted(self.target.elements)
def __post_init__(self, composition): self.composition: Composition = composition if isinstance(composition, Composition) \ else Composition.from_dict(composition)
def cleaveSurfBond(entry, max_bonds=1, supercell=2, group_structs=True, prec=1E-4): """ An algorithm to cleave a surface from a fully periodic crystal. This iteration uses the "periodic bond" approach, as described in Paul et al. in 2020. If this algorithm is used, please cite: https://arxiv.org/abs/2002.00903 inputs -------- entry (list): A set of components necessary for the TSA. Makes it easier to parallelize with this as the input --structure (Structure): pymatgen Structure object --tol (float): The scaling for the atomic bonds --mp_id (str): The label for the entry, commonly the MaterialsProject ID max_bonds (int): The maximum number of bonds to cleave with this algorithm. supercell (int): The size supercell to use for building the bonded atomic network. Typically does not matter, except in the case of a bipartide crystal. In such cases, even or odd sized supercells will result in the TSA identifying a clear dimensionality or "mixed" dimensionality, not respectively. group_structs (bool): Whether to group the surfaces cleaved based on symmetry, or give the full list of surfaces cleaved. The more symmetric the crystal, the more duplicate surfaces will be present prec (float): The precision to compare magnitude of vectors representing the bonds in the system returns -------- cleaved surfaces (list): The list of structure objects representing the cleaved structures. If it is empty, no surfaces could be cleaved """ struct = copy.deepcopy(entry[0]) results = getStructureType(entry, supercell=supercell, returnSS=True) # Proceed only if the structure is classified as periodic # in all directions if results[0] == 'conventional': struct.make_supercell(supercell) binary_matrix = getDistMat(struct, entry[1] - 1) og_num_bonds = sum(sum(np.array(binary_matrix))) / 2 # Get dictionary of directional bonds in the system, # and the associated atomic species bond_dir = getBondVectors(struct, entry[1] - 1, prec) # Create the list of bonds to be broken all_structs = [] combos = [] for s1 in bond_dir: for s2 in bond_dir[s1]: for cleave in bond_dir[s1][s2]: combos.append(cleave[1]) # Create pairings of bonds to be broken, up to # max_bonds number of bonds final_combos = [] for i in range(1, max_bonds + 1): for mix in list(itertools.combinations(combos, max_bonds)): final_combos.append(mix) seed_index = 0 old_cluster_size = len(buildNetwork(binary_matrix, seed_index)) / supercell**3 for combo in final_combos: modified_matrix = np.array(binary_matrix) for sett in combo: for pair in sett: i, j = pair modified_matrix[i][j] = 0 modified_matrix[j][i] = 0 new_num_bonds = sum(sum(modified_matrix)) / 2 # Number of bonds broken in the search. Not necessarily # the number of bonds broken to cleave the surface broken = int(og_num_bonds - new_num_bonds) cluster = buildNetwork(modified_matrix, seed_index) hetero = False if cluster != set(): scale = len(cluster) / old_cluster_size compo = Composition.from_dict( Counter([struct[l].specie.name for l in list(cluster)])) if compo.reduced_formula != struct.composition.reduced_formula: # i.e. the cluster does not have the same composition # as the overall crystal; therefore there are other # clusters of varying composition. hetero = True motiif = getDim(scale, supercell) if not hetero: if motiif == 'layered': cluster_sites = [struct.sites[n] for n in cluster] all_structs.append(struct.from_sites(cluster_sites)) if group_structs: matched = [ x[0] for x in StructureMatcher(stol=1E-6, primitive_cell=False, scale=False).group_structures(all_structs) ] else: matched = all_structs return (matched) else: print('Material is does not have a 3D motiif') print('Try increasing radii tolerance if appropriate') return ([])
def cleaveSurfAtom(entry, max_bonds=1, supercell=2, group_structs=True): """ An algorithm to cleave a surface from a fully periodic crystal. This iteration uses the "periodic atom" approach, as described in Paul et al. in 2020. If this algorithm is used, please cite: https://arxiv.org/abs/2002.00903 inputs -------- entry (list): A set of components necessary for the TSA. Makes it easier to parallelize with this as the input --structure (Structure): pymatgen Structure object --tol (float): The scaling for the atomic bonds --mp_id (str): The label for the entry, commonly the MaterialsProject ID max_bonds (int): The maximum number of bonds to cleave with this algorithm. supercell (int): The size supercell to use for building the bonded atomic network. Typically does not matter, except in the case of a bipartide crystal. In such cases, even or odd sized supercells will result in the TSA identifying a clear dimensionality or "mixed" dimensionality, not respectivel. group_structs (bool): Whether to group the surfaces cleaved based on symmetry, or give the full list of surfaces cleaved. The more symmetric the crystal, the more duplicate surfaces will be present returns -------- cleaved surfaces (list): The list of structure objects representing the cleaved structures. If it is empty, no surfaces could be cleaved """ struct = copy.deepcopy(entry[0]) results = getStructureType(entry, supercell=supercell, returnSS=True) if results[0] == 'conventional': struct = copy.deepcopy(entry[0]) og_binary_matrix = getDistMat(struct, entry[1] - 1) og_num_bonds = sum(sum(np.array(og_binary_matrix))) struct.make_supercell(supercell) binary_matrix = getDistMat(struct, entry[1] - 1) bonds = [] for i in range(len(og_binary_matrix)): for pair in [(i, j) for j in range(i + 1, len(og_binary_matrix)) if og_binary_matrix[i][j] == 1]: bonds.append(pair) allCombos = [] combNum = 0 for i in range(max_bonds + 1): for com in list(itertools.combinations(bonds, i)): allCombos.append(com) combNum += 1 combos = allCombos jjj = 0 all_structs = [] for combo in combos: broken = 0 jjj += 1 modified_matrix = np.array(binary_matrix) for pair in combo: i, j = pair i = i * supercell**3 j = j * supercell**3 for shift in range(supercell**3): for shift2 in range(supercell**3): modified_matrix[i + shift][j + shift2] = 0 modified_matrix[j + shift][i + shift2] = 0 new_num_bonds = sum(sum(modified_matrix)) broken = int(og_num_bonds - new_num_bonds) seed_index = 0 old_cluster_size = len(buildNetwork(binary_matrix, seed_index)) / supercell**3 cluster = buildNetwork(modified_matrix, seed_index) hetero = False if cluster != set(): scale = len(cluster) / old_cluster_size compo = Composition.from_dict( Counter([struct[l].specie.name for l in list(cluster)])) if compo.reduced_formula != struct.composition.reduced_formula: # i.e. the cluster does not have the same composition # as the overall crystal; therefore there are other # clusters of varying composition. hetero = True motiif = getDim(scale, supercell) if not hetero: if motiif == 'layered': cluster_sites = [struct.sites[n] for n in cluster] all_structs.append(struct.from_sites(cluster_sites)) if group_structs: matched = [ x[0] for x in StructureMatcher(stol=1E-6, primitive_cell=False, scale=False).group_structures(all_structs) ] else: matched = all_structs return (matched) else: print('Material is does not have a 3D motiif') print('Try increasing radii tolerance if appropriate') return ([])
def getStructureType(entry, seed_index=0, supercell=2, returnSS=False): """ This is a topology-scaling algorithm used to describe the periodicity of bonded clusters in a bulk structure. It is described in Ashton et al. in 2017. If used, please cite: https://doi.org/10.1103/PhysRevLett.118.106101 inputs -------- entry (list): A set of components necessary for the TSA. Makes it easier to parallelize with this as the input --structure (Structure): pymatgen Structure object --tol (float): The scaling for the atomic bonds --mp_id (str): The label for the entry, commonly the MaterialsProject ID seed_index (int): The site to use as the starting point for the TSA. Typically does not impact the results, but will if the structure is a bipartide or has mixed dimensionality supercell (int): The size supercell to use for building the bonded atomic network. Typically does not matter, except in the case of a bipartide crystal. In such cases, even or odd sized supercells will result in the TSA identifying a clear dimensionality or "mixed" dimensionality, not respectivel. returns -------- type0 (str): The dimensionality of the structure mp_id (str): Same mp_id as entered in "entry" tol (float): Same float as entered in "entry" compo.reduced formula (str): The formula of the identified network list(og_cluster) (list): The list of sites associated with the network, relative to the original structure object """ # Entry is a list of the necessary components of the TSA. # Makes it easier to parallelize structure, tol, mp_id = entry norm_tol = tol - 1 s = copy.deepcopy(structure) heterogeneous = False heterogeneousSS = False # Distance matrix (rowA, columnB) shows distance between # atoms A and B, taking PBCs into account. binary_matrix = getDistMat(s, norm_tol) cluster = buildNetwork(binary_matrix, seed_index) compo = Composition.from_dict( Counter([s[l].specie.name for l in list(cluster)])) if compo.reduced_formula != s.composition.reduced_formula: # i.e. the cluster does not have the same composition # as the overall crystal; therefore there are other # clusters of varying composition. heterogeneous = True og_cluster = set() og_cluster.update(cluster) old_cluster_size = len(cluster) # Increase structure to determine dimensionality s.make_supercell(supercell) seed_index *= supercell**3 binary_matrix = getDistMat(s, norm_tol) cluster = buildNetwork(binary_matrix, seed_index) if cluster != set(): new_cluster_size = len(cluster) # Get ratio of original and final cluster lengths scale = new_cluster_size / old_cluster_size compo = Composition.from_dict( Counter([s[l].specie.name for l in list(cluster)])) if compo.reduced_formula != s.composition.reduced_formula: # i.e. the cluster does not have the same composition # as the overall crystal; therefore there are other # clusters of varying composition. heterogeneousSS = True motiif = getDim(scale, supercell) if heterogeneous or heterogeneousSS: motiif += "_heter" if returnSS: return [motiif, mp_id, tol, compo.reduced_formula, list(cluster)] else: return [motiif, mp_id, tol, compo.reduced_formula, list(og_cluster)]
def dict_form_to_chemical_formula(self, dict_form): return Composition.from_dict(dict_form).reduced_formula