Beispiel #1
0
def get_insertion_energy(
    base_entry: ComputedStructureEntry,
    inserted_entry: ComputedStructureEntry,
    migrating_ion_entry: ComputedEntry,
) -> float:
    """
    Calculate the insertion energy for a given inserted entry
    Args:
        base_entry: The entry for the host structure
        inserted_entry: The entry for the inserted structure
        migrating_ion_entry: The entry for the metallic phase of the working ion
    Returns:
        The insertion energy defined as (E[inserted] - (E[Base] + n * E[working_ion]))/(n)
        Where n is the number of working ions and E[inserted].
        Additionally, and E[base] and E[inserted] are for structures of the same size (sans working ion)
    """
    wi_ = str(migrating_ion_entry.composition.elements[0])
    comp_inserted_no_wi = inserted_entry.composition.as_dict()
    comp_inserted_no_wi.pop(wi_)
    comp_inserted_no_wi = Composition.from_dict(comp_inserted_no_wi)
    _, factor_inserted = comp_inserted_no_wi.get_reduced_composition_and_factor(
    )
    _, factor_base = base_entry.composition.get_reduced_composition_and_factor(
    )
    e_base = base_entry.energy * factor_inserted / factor_base
    e_insert = inserted_entry.energy
    e_wi = migrating_ion_entry.energy_per_atom
    n_wi = inserted_entry.composition[wi_]

    return (e_insert - (e_base + n_wi * e_wi)) / n_wi
Beispiel #2
0
    def _tidy_column(self, df, featurizer_type):
        """
        Various conversions to homogenize columns for featurization input.
        For example, take a column of compositions and ensure they are decorated
        with oxidation states, are not strings, etc.

        Args:
            df (pandas.DataFrame)
            featurizer_type: The key defining the featurizer input. For example,
                composition featurizers should have featurizer_type of
                "composition".

        Returns:
            df (pandas.DataFrame): DataFrame with featurizer_type column
                ready for featurization.
        """
        # todo: Make the following conversions more robust (no [0] type checking)
        type_tester = df[featurizer_type].iloc[0]

        if featurizer_type == self.composition_col:
            # Convert formulas to composition objects
            if isinstance(type_tester, str):
                self.logger.info(
                    self._log_prefix +
                    "Compositions detected as strings. Attempting "
                    "conversion to Composition objects...")
                stc = StrToComposition(overwrite_data=True,
                                       target_col_id=featurizer_type)
                df = stc.featurize_dataframe(df,
                                             featurizer_type,
                                             multiindex=self.multiindex,
                                             ignore_errors=True,
                                             inplace=False)

            elif isinstance(type_tester, dict):
                self.logger.info(self._log_prefix +
                                 "Compositions detected as dicts. Attempting "
                                 "conversion to Composition objects...")
                df[featurizer_type] = [
                    Composition.from_dict(d) for d in df[featurizer_type]
                ]

            # Convert non-oxidstate containing comps to oxidstate comps
            if self.guess_oxistates:
                self.logger.info(
                    self._log_prefix +
                    "Guessing oxidation states of compositions, as"
                    " they were not present in input.")
                cto = CompositionToOxidComposition(
                    target_col_id=featurizer_type,
                    overwrite_data=True,
                    return_original_on_error=True,
                    max_sites=-50)
                try:
                    df = cto.featurize_dataframe(df,
                                                 featurizer_type,
                                                 multiindex=self.multiindex,
                                                 inplace=False)
                except Exception as e:
                    self.logger.info(self._log_prefix +
                                     "Could not decorate oxidation states due "
                                     "to {}. Excluding featurizers based on "
                                     "composition oxistates".format(e))
                    classes_require_oxi = [
                        c.__class__.__name__
                        for c in CompositionFeaturizers().need_oxi
                    ]
                    self.exclude.extend(classes_require_oxi)

        else:
            # Convert structure/bs/dos dicts to objects (robust already)
            if isinstance(type_tester, (dict, str)):
                self.logger.info(self._log_prefix.capitalize() +
                                 "{} detected as string or dict. Attempting "
                                 "conversion to {} objects..."
                                 "".format(featurizer_type, featurizer_type))
                if isinstance(type_tester, str):
                    raise ValueError("{} column is type {}. Cannot convert."
                                     "".format(featurizer_type,
                                               type(type_tester)))
                dto = DictToObject(overwrite_data=True,
                                   target_col_id=featurizer_type)
                df = dto.featurize_dataframe(df,
                                             featurizer_type,
                                             inplace=False)

                # Decorate with oxidstates
                if featurizer_type == self.structure_col and \
                        self.guess_oxistates:
                    self.logger.info(
                        self._log_prefix +
                        "Guessing oxidation states of structures if they were "
                        "not present in input.")
                    sto = StructureToOxidStructure(
                        target_col_id=featurizer_type,
                        overwrite_data=True,
                        return_original_on_error=True,
                        max_sites=-50)
                    try:
                        df = sto.featurize_dataframe(
                            df,
                            featurizer_type,
                            multiindex=self.multiindex,
                            inplace=False)
                    except Exception as e:
                        self.logger.info(
                            self._log_prefix +
                            "Could not decorate oxidation states on structures "
                            "due to {}.".format(e))
        return df
Beispiel #3
0
 def __post_init__(self, target, vertex_elements):
     self.target: Composition = target if isinstance(target, Composition) \
         else Composition.from_dict(target)
     self.vertex_elements = vertex_elements or sorted(self.target.elements)
Beispiel #4
0
 def __post_init__(self, composition):
     self.composition: Composition = composition if isinstance(composition, Composition) \
         else Composition.from_dict(composition)
Beispiel #5
0
def cleaveSurfBond(entry,
                   max_bonds=1,
                   supercell=2,
                   group_structs=True,
                   prec=1E-4):
    """
    An algorithm to cleave a surface from a fully periodic crystal. This 
    iteration uses the "periodic bond" approach, as described in Paul et al.
    in 2020. If this algorithm is used, please cite:
        
        https://arxiv.org/abs/2002.00903
    
    inputs
    --------
        entry (list): A set of components necessary for the TSA.
                      Makes it easier to parallelize with this as
                      the input
                      --structure (Structure): pymatgen Structure object
                      --tol (float): The scaling for the atomic bonds
                      --mp_id (str): The label for the entry, commonly
                                     the MaterialsProject ID
                                     
        max_bonds (int):    The maximum number of bonds to cleave with this
                            algorithm. 
                            
        supercell (int):    The size supercell to use for building the 
                            bonded atomic network. Typically does not 
                            matter, except in the case of a bipartide
                            crystal. In such cases, even or odd sized
                            supercells will result in the TSA identifying 
                            a clear dimensionality or "mixed" dimensionality,
                            not respectively.
                            
        group_structs (bool):   Whether to group the surfaces cleaved based 
                                on symmetry, or give the full list of surfaces
                                cleaved. The more symmetric the crystal, the
                                more duplicate surfaces will be present
                                
        prec (float):   The precision to compare magnitude of vectors
                        representing the bonds in the system                                      

    returns
    --------
        cleaved surfaces (list): The list of structure objects representing
                                 the cleaved structures. If it is empty,
                                 no surfaces could be cleaved
        
    """

    struct = copy.deepcopy(entry[0])
    results = getStructureType(entry, supercell=supercell, returnSS=True)

    # Proceed only if the structure is classified as periodic
    # in all directions
    if results[0] == 'conventional':
        struct.make_supercell(supercell)
        binary_matrix = getDistMat(struct, entry[1] - 1)
        og_num_bonds = sum(sum(np.array(binary_matrix))) / 2

        # Get dictionary of directional bonds in the system,
        # and the associated atomic species
        bond_dir = getBondVectors(struct, entry[1] - 1, prec)

        # Create the list of bonds to be broken
        all_structs = []
        combos = []
        for s1 in bond_dir:
            for s2 in bond_dir[s1]:
                for cleave in bond_dir[s1][s2]:
                    combos.append(cleave[1])

        # Create pairings of bonds to be broken, up to
        # max_bonds number of bonds

        final_combos = []
        for i in range(1, max_bonds + 1):
            for mix in list(itertools.combinations(combos, max_bonds)):
                final_combos.append(mix)
        seed_index = 0
        old_cluster_size = len(buildNetwork(binary_matrix,
                                            seed_index)) / supercell**3
        for combo in final_combos:
            modified_matrix = np.array(binary_matrix)
            for sett in combo:
                for pair in sett:
                    i, j = pair
                    modified_matrix[i][j] = 0
                    modified_matrix[j][i] = 0
            new_num_bonds = sum(sum(modified_matrix)) / 2

            # Number of bonds broken in the search. Not necessarily
            # the number of bonds broken to cleave the surface

            broken = int(og_num_bonds - new_num_bonds)

            cluster = buildNetwork(modified_matrix, seed_index)
            hetero = False
            if cluster != set():
                scale = len(cluster) / old_cluster_size
                compo = Composition.from_dict(
                    Counter([struct[l].specie.name for l in list(cluster)]))
                if compo.reduced_formula != struct.composition.reduced_formula:
                    # i.e. the cluster does not have the same composition
                    # as the overall crystal; therefore there are other
                    # clusters of varying composition.
                    hetero = True
                motiif = getDim(scale, supercell)

            if not hetero:
                if motiif == 'layered':
                    cluster_sites = [struct.sites[n] for n in cluster]
                    all_structs.append(struct.from_sites(cluster_sites))

        if group_structs:
            matched = [
                x[0] for x in
                StructureMatcher(stol=1E-6, primitive_cell=False,
                                 scale=False).group_structures(all_structs)
            ]
        else:
            matched = all_structs
        return (matched)

    else:
        print('Material is does not have a 3D motiif')
        print('Try increasing radii tolerance if appropriate')
        return ([])
Beispiel #6
0
def cleaveSurfAtom(entry, max_bonds=1, supercell=2, group_structs=True):
    """
    An algorithm to cleave a surface from a fully periodic crystal. This 
    iteration uses the "periodic atom" approach, as described in Paul et al.
    in 2020. If this algorithm is used, please cite:
        
        https://arxiv.org/abs/2002.00903
    
    inputs
    --------
        entry (list): A set of components necessary for the TSA.
                      Makes it easier to parallelize with this as
                      the input
                      --structure (Structure): pymatgen Structure object
                      --tol (float): The scaling for the atomic bonds
                      --mp_id (str): The label for the entry, commonly
                                     the MaterialsProject ID
        max_bonds (int):    The maximum number of bonds to cleave with this
                            algorithm. 
                            
        supercell (int):    The size supercell to use for building the 
                            bonded atomic network. Typically does not 
                            matter, except in the case of a bipartide
                            crystal. In such cases, even or odd sized
                            supercells will result in the TSA identifying 
                            a clear dimensionality or "mixed" dimensionality,
                            not respectivel.
        group_structs (bool):   Whether to group the surfaces cleaved based 
                                on symmetry, or give the full list of surfaces
                                cleaved. The more symmetric the crystal, the
                                more duplicate surfaces will be present                                    

    returns
    --------
        cleaved surfaces (list): The list of structure objects representing
                                 the cleaved structures. If it is empty,
                                 no surfaces could be cleaved
        
    """

    struct = copy.deepcopy(entry[0])
    results = getStructureType(entry, supercell=supercell, returnSS=True)
    if results[0] == 'conventional':
        struct = copy.deepcopy(entry[0])
        og_binary_matrix = getDistMat(struct, entry[1] - 1)
        og_num_bonds = sum(sum(np.array(og_binary_matrix)))
        struct.make_supercell(supercell)
        binary_matrix = getDistMat(struct, entry[1] - 1)
        bonds = []
        for i in range(len(og_binary_matrix)):
            for pair in [(i, j) for j in range(i + 1, len(og_binary_matrix))
                         if og_binary_matrix[i][j] == 1]:
                bonds.append(pair)
        allCombos = []
        combNum = 0
        for i in range(max_bonds + 1):
            for com in list(itertools.combinations(bonds, i)):
                allCombos.append(com)
                combNum += 1

        combos = allCombos
        jjj = 0
        all_structs = []
        for combo in combos:
            broken = 0
            jjj += 1
            modified_matrix = np.array(binary_matrix)
            for pair in combo:
                i, j = pair
                i = i * supercell**3
                j = j * supercell**3
                for shift in range(supercell**3):
                    for shift2 in range(supercell**3):
                        modified_matrix[i + shift][j + shift2] = 0
                        modified_matrix[j + shift][i + shift2] = 0

            new_num_bonds = sum(sum(modified_matrix))
            broken = int(og_num_bonds - new_num_bonds)
            seed_index = 0
            old_cluster_size = len(buildNetwork(binary_matrix,
                                                seed_index)) / supercell**3
            cluster = buildNetwork(modified_matrix, seed_index)
            hetero = False
            if cluster != set():
                scale = len(cluster) / old_cluster_size
                compo = Composition.from_dict(
                    Counter([struct[l].specie.name for l in list(cluster)]))
                if compo.reduced_formula != struct.composition.reduced_formula:
                    # i.e. the cluster does not have the same composition
                    # as the overall crystal; therefore there are other
                    # clusters of varying composition.
                    hetero = True
                motiif = getDim(scale, supercell)

            if not hetero:
                if motiif == 'layered':
                    cluster_sites = [struct.sites[n] for n in cluster]
                    all_structs.append(struct.from_sites(cluster_sites))

        if group_structs:
            matched = [
                x[0] for x in
                StructureMatcher(stol=1E-6, primitive_cell=False,
                                 scale=False).group_structures(all_structs)
            ]
        else:
            matched = all_structs
        return (matched)

    else:
        print('Material is does not have a 3D motiif')
        print('Try increasing radii tolerance if appropriate')
        return ([])
Beispiel #7
0
def getStructureType(entry, seed_index=0, supercell=2, returnSS=False):
    """
    This is a topology-scaling algorithm used to describe the
    periodicity of bonded clusters in a bulk structure. It is 
    described in Ashton et al. in 2017. If used, please cite:
        https://doi.org/10.1103/PhysRevLett.118.106101
    
    inputs
    --------
        entry (list): A set of components necessary for the TSA.
                      Makes it easier to parallelize with this as
                      the input
                      --structure (Structure): pymatgen Structure object
                      --tol (float): The scaling for the atomic bonds
                      --mp_id (str): The label for the entry, commonly
                                     the MaterialsProject ID
        seed_index (int):   The site to use as the starting point for the
                            TSA. Typically does not impact the results, but
                            will if the structure is a bipartide or has
                            mixed dimensionality
                            
        supercell (int):    The size supercell to use for building the 
                            bonded atomic network. Typically does not 
                            matter, except in the case of a bipartide
                            crystal. In such cases, even or odd sized
                            supercells will result in the TSA identifying 
                            a clear dimensionality or "mixed" dimensionality,
                            not respectivel.
                            
                            
    returns
    --------
        type0 (str): The dimensionality of the structure
        mp_id (str): Same mp_id as entered in "entry"
        tol (float): Same float as entered in "entry"
        compo.reduced formula (str): The formula of the identified
                                     network
        list(og_cluster) (list): The list of sites associated with
                                 the network, relative to the original
                                 structure object
    """

    # Entry is a list of the necessary components of the TSA.
    # Makes it easier to parallelize
    structure, tol, mp_id = entry
    norm_tol = tol - 1

    s = copy.deepcopy(structure)
    heterogeneous = False
    heterogeneousSS = False

    # Distance matrix (rowA, columnB) shows distance between
    # atoms A and B, taking PBCs into account.

    binary_matrix = getDistMat(s, norm_tol)
    cluster = buildNetwork(binary_matrix, seed_index)

    compo = Composition.from_dict(
        Counter([s[l].specie.name for l in list(cluster)]))
    if compo.reduced_formula != s.composition.reduced_formula:
        # i.e. the cluster does not have the same composition
        # as the overall crystal; therefore there are other
        # clusters of varying composition.
        heterogeneous = True
    og_cluster = set()
    og_cluster.update(cluster)
    old_cluster_size = len(cluster)
    # Increase structure to determine dimensionality

    s.make_supercell(supercell)
    seed_index *= supercell**3

    binary_matrix = getDistMat(s, norm_tol)
    cluster = buildNetwork(binary_matrix, seed_index)

    if cluster != set():
        new_cluster_size = len(cluster)
        # Get ratio of original and final cluster lengths
        scale = new_cluster_size / old_cluster_size
        compo = Composition.from_dict(
            Counter([s[l].specie.name for l in list(cluster)]))
        if compo.reduced_formula != s.composition.reduced_formula:
            # i.e. the cluster does not have the same composition
            # as the overall crystal; therefore there are other
            # clusters of varying composition.
            heterogeneousSS = True
        motiif = getDim(scale, supercell)
    if heterogeneous or heterogeneousSS:
        motiif += "_heter"

    if returnSS:
        return [motiif, mp_id, tol, compo.reduced_formula, list(cluster)]
    else:
        return [motiif, mp_id, tol, compo.reduced_formula, list(og_cluster)]
Beispiel #8
0
 def dict_form_to_chemical_formula(self, dict_form):
     return Composition.from_dict(dict_form).reduced_formula