def test_get_primitive_structure(self): coords = [[0, 0, 0], [0.5, 0.5, 0], [0, 0.5, 0.5], [0.5, 0, 0.5]] fcc_ag = Structure(Lattice.cubic(4.09), ["Ag"] * 4, coords) self.assertEqual(len(fcc_ag.get_primitive_structure()), 1) coords = [[0, 0, 0], [0.5, 0.5, 0.5]] bcc_li = Structure(Lattice.cubic(4.09), ["Li"] * 2, coords) self.assertEqual(len(bcc_li.get_primitive_structure()), 1)
def test_primitive_cell_site_merging(self): l = Lattice.cubic(10) coords = [[0, 0, 0], [0, 0, 0.5], [0, 0, 0.26], [0, 0, 0.74]] sp = ['Ag', 'Ag', 'Be', 'Be'] s = Structure(l, sp, coords) dm = s.get_primitive_structure().distance_matrix self.assertArrayAlmostEqual(dm, [[0, 2.5], [2.5, 0]])
def test_primitive_on_large_supercell(self): coords = [[0, 0, 0], [0.5, 0.5, 0], [0, 0.5, 0.5], [0.5, 0, 0.5]] fcc_ag = Structure(Lattice.cubic(4.09), ["Ag"] * 4, coords) fcc_ag.make_supercell([2, 2, 2]) fcc_ag_prim = fcc_ag.get_primitive_structure() self.assertEqual(len(fcc_ag_prim), 1) self.assertAlmostEqual(fcc_ag_prim.volume, 17.10448225)
def _sanitize_input_structure(input_structure: Structure) -> Structure: """Sanitize our input structure by removing magnetic information and making primitive. Args: input_structure: Structure Returns: Structure """ input_structure = input_structure.copy() # remove any annotated spin input_structure.remove_spin() # sanitize input structure: first make primitive ... input_structure = input_structure.get_primitive_structure( use_site_props=False) # ... and strip out existing magmoms, which can cause conflicts # with later transformations otherwise since sites would end up # with both magmom site properties and Species spins defined if "magmom" in input_structure.site_properties: input_structure.remove_site_property("magmom") return input_structure
def test_disordered_supercell_primitive_cell(self): l = Lattice.cubic(2) f = [[0.5, 0.5, 0.5]] sp = [{'Si': 0.54738}] s = Structure(l, sp, f) #this supercell often breaks things s.make_supercell([[0,-1,1],[-1,1,0],[1,1,1]]) self.assertEqual(len(s.get_primitive_structure()), 1)
def test_disordered_supercell_primitive_cell(self): l = Lattice.cubic(2) f = [[0.5, 0.5, 0.5]] sp = [{'Si': 0.54738}] s = Structure(l, sp, f) #this supercell often breaks things s.make_supercell([[0, -1, 1], [-1, 1, 0], [1, 1, 1]]) self.assertEqual(len(s.get_primitive_structure()), 1)
def test_primitive_structure_volume_check(self): l = Lattice.tetragonal(10, 30) coords = [[0.5, 0.8, 0], [0.5, 0.2, 0], [0.5, 0.8, 0.333], [0.5, 0.5, 0.333], [0.5, 0.5, 0.666], [0.5, 0.2, 0.666]] s = Structure(l, ["Ag"] * 6, coords) sprim = s.get_primitive_structure(tolerance=0.1) self.assertEqual(len(sprim), 6)
def structure_parser_wrapper(structure:Structure, is_primitive_cell=True, fractional=True, remove_unused_parameters=True) -> typing.Dict: """ Same as above but expects pymatgen.core.structure.Structure object """ if is_primitive_cell: structure = structure.get_primitive_structure() if fractional: position_key = 'fractional' positions = structure.frac_coords.tolist() else: position_key = 'xyz' positions = structure.cart_coords.tolist() sg = structure.get_space_group_info() bravais = space_groups.space_group_to_bravais(sg[1]) lattice_parameters = lattice_parameters_from_cif(structure) # Required for qCore input if remove_unused_parameters: lattice_parameters = remove_superflous_parameters(lattice_parameters, bravais) species = [element_enum.value for element_enum in structure.species] assert len(species) == len(positions) crystal_data = {position_key: positions, 'species': species, 'lattice_parameters': lattice_parameters, 'space_group': sg, 'bravais': bravais, 'n_atoms': len(species)} return crystal_data
def get_analysis_and_structure( self, structure: Structure, calculate_valences: bool = True, guesstimate_spin: bool = False, op_threshold: float = 0.1, ) -> Tuple[Dict, Structure]: """Obtain an analysis of a given structure and if it may be Jahn-Teller active or not. This is a heuristic, and may give false positives and false negatives (false positives are preferred). Args: structure: input structure calculate_valences: whether to attempt to calculate valences or not, structure should have oxidation states to perform analysis (Default value = True) guesstimate_spin: whether to guesstimate spin state from magnetic moments or not, use with caution (Default value = False) op_threshold: threshold for order parameter above which to consider site to match an octahedral or tetrahedral motif, since Jahn-Teller structures can often be quite distorted, this threshold is smaller than one might expect Returns: analysis of structure, with key 'strength' which may be 'none', 'strong', 'weak', or 'unknown' (Default value = 0.1) and decorated structure """ structure = structure.get_primitive_structure() if calculate_valences: bva = BVAnalyzer() structure = bva.get_oxi_state_decorated_structure(structure) # no point testing multiple equivalent sites, doesn't make any difference to analysis # but makes returned symmetrized_structure = SpacegroupAnalyzer( structure ).get_symmetrized_structure() # to detect structural motifs of a given site op = LocalStructOrderParams(["oct", "tet"]) # dict of site index to the Jahn-Teller analysis of that site jt_sites = [] non_jt_sites = [] for indices in symmetrized_structure.equivalent_indices: idx = indices[0] site = symmetrized_structure[idx] # only interested in sites with oxidation states if ( isinstance(site.specie, Species) and site.specie.element.is_transition_metal ): # get motif around site order_params = op.get_order_parameters(symmetrized_structure, idx) if order_params[0] > order_params[1] and order_params[0] > op_threshold: motif = "oct" motif_order_parameter = order_params[0] elif order_params[1] > op_threshold: motif = "tet" motif_order_parameter = order_params[1] else: motif = "unknown" motif_order_parameter = None if motif == "oct" or motif == "tet": # guess spin of metal ion if guesstimate_spin and "magmom" in site.properties: # estimate if high spin or low spin magmom = site.properties["magmom"] spin_state = self._estimate_spin_state( site.specie, motif, magmom ) else: spin_state = "unknown" magnitude = self.get_magnitude_of_effect_from_species( site.specie, spin_state, motif ) if magnitude != "none": ligands = get_neighbors_of_site_with_index( structure, idx, approach="min_dist", delta=0.15 ) ligand_bond_lengths = [ ligand.distance(structure[idx]) for ligand in ligands ] ligands_species = list( set([str(ligand.specie) for ligand in ligands]) ) ligand_bond_length_spread = max(ligand_bond_lengths) - min( ligand_bond_lengths ) def trim(f): """ Avoid storing to unreasonable precision, hurts readability. """ return float("{:.4f}".format(f)) # to be Jahn-Teller active, all ligands have to be the same if len(ligands_species) == 1: jt_sites.append( { "strength": magnitude, "motif": motif, "motif_order_parameter": trim( motif_order_parameter ), "spin_state": spin_state, "species": str(site.specie), "ligand": ligands_species[0], "ligand_bond_lengths": [ trim(length) for length in ligand_bond_lengths ], "ligand_bond_length_spread": trim( ligand_bond_length_spread ), "site_indices": indices, } ) # store reasons for not being J-T active else: non_jt_sites.append( { "site_indices": indices, "strength": "none", "reason": "Not Jahn-Teller active for this " "electronic configuration.", } ) else: non_jt_sites.append( { "site_indices": indices, "strength": "none", "reason": "motif is {}".format(motif), } ) # perform aggregation of all sites if jt_sites: analysis = {"active": True} # type: Dict[str, Any] # if any site could exhibit 'strong' Jahn-Teller effect # then mark whole structure as strong strong_magnitudes = [site["strength"] == "strong" for site in jt_sites] if any(strong_magnitudes): analysis["strength"] = "strong" else: analysis["strength"] = "weak" analysis["sites"] = jt_sites return analysis, structure else: return {"active": False, "sites": non_jt_sites}, structure
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing #common representations for elements/water in cif files special_symbols = {"D":"D", "Hw":"H", "Ow":"O", "Wat":"O", "wat": "O"} elements = map(str, ptable.all_elements) lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except KeyError: # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp(special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol) if \ symbol in special_symbols else symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) if coord not in coord_to_species: coord_to_species[coord] = {el: occu} else: coord_to_species[coord][el] = occu coord_to_species = {k: Composition(v) for k, v in coord_to_species.items()} allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) #rescale occupancies if necessary for species in allspecies: totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: for key, value in six.iteritems(species): species[key] = value / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def __init__( self, structure: Structure, overwrite_magmom_mode: Union[OverwriteMagmomMode, str] = "none", round_magmoms: bool = False, detect_valences: bool = False, make_primitive: bool = True, default_magmoms: bool = None, set_net_positive: bool = True, threshold: float = 0.1, ): """ A class which provides a few helpful methods to analyze collinear magnetic structures. If magnetic moments are not defined, moments will be taken either from default_magmoms.yaml (similar to the default magmoms in MPRelaxSet, with a few extra definitions) or from a specie:magmom dict provided by the default_magmoms kwarg. Input magmoms can be replaced using the 'overwrite_magmom_mode' kwarg. This can be: * "none" to do nothing, * "respect_sign" which will overwrite existing magmoms with those from default_magmoms but will keep sites with positive magmoms positive, negative magmoms negative and zero magmoms zero, * "respect_zeros", which will give a ferromagnetic structure (all positive magmoms from default_magmoms) but still keep sites with zero magmoms as zero, * "replace_all" which will try to guess initial magmoms for all sites in the structure irrespective of input structure (this is most suitable for an initial DFT calculation), * "replace_all_if_undefined" is the same as "replace_all" but only if no magmoms are defined in input structure, otherwise it will respect existing magmoms. * "normalize" will normalize magmoms to unity, but will respect sign (used for comparing orderings), magmoms < theshold will be set to zero :param structure: Structure object :param overwrite_magmom_mode (str): default "none" :param round_magmoms (int or bool): will round input magmoms to specified number of decimal places if integer is supplied, if set to a float will try and group magmoms together using a kernel density estimator of provided width, and extracting peaks of the estimator :param detect_valences (bool): if True, will attempt to assign valences to input structure :param make_primitive (bool): if True, will transform to primitive magnetic cell :param default_magmoms (dict): (optional) dict specifying default magmoms :param set_net_positive (bool): if True, will change sign of magnetic moments such that the net magnetization is positive. Argument will be ignored if mode "respect_sign" is used. :param threshold (float): number (in Bohr magnetons) below which magmoms will be rounded to zero, default of 0.1 can probably be increased for many magnetic systems, depending on your application """ if default_magmoms: self.default_magmoms = default_magmoms else: self.default_magmoms = DEFAULT_MAGMOMS structure = structure.copy() # check for disorder if not structure.is_ordered: raise NotImplementedError( "Not implemented for disordered structures, " "make ordered approximation first.") if detect_valences: trans = AutoOxiStateDecorationTransformation() bva = BVAnalyzer() try: structure = trans.apply_transformation(structure) except ValueError: warnings.warn("Could not assign valences " "for {}".format( structure.composition.reduced_formula)) # check to see if structure has magnetic moments # on site properties or species spin properties, # prioritize site properties has_magmoms = bool(structure.site_properties.get("magmom", False)) has_spin = False for comp in structure.species_and_occu: for sp, occu in comp.items(): if getattr(sp, "spin", False): has_spin = True # perform input sanitation ... # rest of class will assume magnetic moments # are stored on site properties: # this is somewhat arbitrary, arguments can # be made for both approaches if has_magmoms and has_spin: raise ValueError("Structure contains magnetic moments on both " "magmom site properties and spin species " "properties. This is ambiguous. Remove one or " "the other.") elif has_magmoms: if None in structure.site_properties["magmom"]: warnings.warn("Be careful with mixing types in your magmom " "site properties. Any 'None' magmoms have been " "replaced with zero.") magmoms = [ m if m else 0 for m in structure.site_properties["magmom"] ] elif has_spin: magmoms = [getattr(sp, "spin", 0) for sp in structure.species] structure.remove_spin() else: # no magmoms present, add zero magmoms for now magmoms = [0] * len(structure) # and overwrite magmoms with default magmoms later unless otherwise stated if overwrite_magmom_mode == "replace_all_if_undefined": overwrite_magmom_mode = "replace_all" # test to see if input structure has collinear magmoms self.is_collinear = Magmom.are_collinear(magmoms) if not self.is_collinear: warnings.warn( "This class is not designed to be used with " "non-collinear structures. If your structure is " "only slightly non-collinear (e.g. canted) may still " "give useful results, but use with caution.") # this is for collinear structures only, make sure magmoms # are all floats magmoms = list(map(float, magmoms)) # set properties that should be done /before/ we process input magmoms self.total_magmoms = sum(magmoms) self.magnetization = sum(magmoms) / structure.volume # round magmoms below threshold to zero magmoms = [m if abs(m) > threshold else 0 for m in magmoms] # overwrite existing magmoms with default_magmoms if overwrite_magmom_mode not in ( "none", "respect_sign", "respect_zeros", "replace_all", "replace_all_if_undefined", "normalize", ): raise ValueError("Unsupported mode.") for idx, site in enumerate(structure): if site.species_string in self.default_magmoms: # look for species first, e.g. Fe2+ default_magmom = self.default_magmoms[site.species_string] elif (isinstance(site.specie, Specie) and str(site.specie.element) in self.default_magmoms): # look for element, e.g. Fe default_magmom = self.default_magmoms[str(site.specie.element)] else: default_magmom = 0 # overwrite_magmom_mode = "respect_sign" will change magnitude of # existing moments only, and keep zero magmoms as # zero: it will keep the magnetic ordering intact if overwrite_magmom_mode == "respect_sign": set_net_positive = False if magmoms[idx] > 0: magmoms[idx] = default_magmom elif magmoms[idx] < 0: magmoms[idx] = -default_magmom # overwrite_magmom_mode = "respect_zeros" will give a ferromagnetic # structure but will keep zero magmoms as zero elif overwrite_magmom_mode == "respect_zeros": if magmoms[idx] != 0: magmoms[idx] = default_magmom # overwrite_magmom_mode = "replace_all" will ignore input magmoms # and give a ferromagnetic structure with magnetic # moments on *all* atoms it thinks could be magnetic elif overwrite_magmom_mode == "replace_all": magmoms[idx] = default_magmom # overwrite_magmom_mode = "normalize" set magmoms magnitude to 1 elif overwrite_magmom_mode == "normalize": if magmoms[idx] != 0: magmoms[idx] = int(magmoms[idx] / abs(magmoms[idx])) # round magmoms, used to smooth out computational data magmoms = (self._round_magmoms(magmoms, round_magmoms) if round_magmoms else magmoms) if set_net_positive: sign = np.sum(magmoms) if sign < 0: magmoms = -np.array(magmoms) structure.add_site_property("magmom", magmoms) if make_primitive: structure = structure.get_primitive_structure(use_site_props=True) self.structure = structure
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def get_num_implicit_hydrogens(sym): num_h = {"Wat": 2, "wat": 2, "O-H": 1} return num_h.get(sym[:3], 0) lattice = self.get_lattice(data) # if magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict if self.feature_flags["magcif_incommensurate"]: raise NotImplementedError( "Incommensurate structures not currently supported.") elif self.feature_flags["magcif"]: self.symmetry_operations = self.get_magsymops(data) magmoms = self.parse_magmoms(data, lattice=lattice) else: self.symmetry_operations = self.get_symops(data) magmoms = {} oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() coord_to_magmoms = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False label_el_dict = {} for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = self._parse_symbol(data["_atom_site_type_symbol"][i]) label = data["_atom_site_label"][i] num_h = get_num_implicit_hydrogens( data["_atom_site_type_symbol"][i]) except KeyError: symbol = self._parse_symbol(data["_atom_site_label"][i]) label = data["_atom_site_label"][i] num_h = get_num_implicit_hydrogens(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in data.data.keys(): oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) magmom = magmoms.get(data["_atom_site_label"][i], np.array([0, 0, 0])) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) comp_d = {el: occu} if num_h > 0: comp_d["H"] = num_h comp = Composition(comp_d) if not match: coord_to_species[coord] = comp coord_to_magmoms[coord] = magmom else: coord_to_species[match] += comp # disordered magnetic not currently supported coord_to_magmoms[match] = None label_el_dict[coord] = label sum_occu = [ sum(c.values()) for c in coord_to_species.values() if not set(c.elements) == {Element("O"), Element("H")} ] if any([o > 1 for o in sum_occu]): msg = "Some occupancies (%s) sum to > 1! If they are within " \ "the tolerance, they will be rescaled." % str(sum_occu) warnings.warn(msg) self.errors.append(msg) allspecies = [] allcoords = [] allmagmoms = [] allhydrogens = [] alllabels = [] # check to see if magCIF file is disordered if self.feature_flags["magcif"]: for k, v in coord_to_magmoms.items(): if v is None: # Proposed solution to this is to instead store magnetic # moments as Specie 'spin' property, instead of site # property, but this introduces ambiguities for end user # (such as unintended use of `spin` and Specie will have # fictious oxidation state). raise NotImplementedError( 'Disordered magnetic structures not currently supported.' ) if coord_to_species.items(): for comp, group in groupby(sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] #print(tmp_coords) labels = [] for i in tmp_coords: labels.append(label_el_dict[i]) #print(labels) tmp_magmom = [ coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords ] if self.feature_flags["magcif"]: coords, magmoms, coords_num = self._unique_coords( tmp_coords, magmoms_in=tmp_magmom, lattice=lattice) else: coords, magmoms, coords_num = self._unique_coords( tmp_coords) if set(comp.elements) == {Element("O"), Element("H")}: # O with implicit hydrogens im_h = comp["H"] species = Composition({"O": comp["O"]}) else: im_h = 0 species = comp allhydrogens.extend(len(coords) * [im_h]) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) allmagmoms.extend(magmoms) for i in range(len(coords_num)): alllabels.extend(coords_num[i] * [labels[i]]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords) \ and len(allspecies) == len(allmagmoms): site_properties = dict() if any(allhydrogens): assert len(allhydrogens) == len(allcoords) site_properties["implicit_hydrogens"] = allhydrogens if self.feature_flags["magcif"]: site_properties["magmom"] = allmagmoms if len(site_properties) == 0: site_properties = None struct = Structure(lattice, allspecies, allcoords, site_properties=site_properties) #struct = struct.get_sorted_structure() if primitive and self.feature_flags['magcif']: struct = struct.get_primitive_structure(use_site_props=True) elif primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() struct.add_site_property("_atom_site_label", alllabels) return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O" } elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp( special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ lengths = [float_from_str(data["_cell_length_" + i]) for i in ["a", "b", "c"]] angles = [float_from_str(data["_cell_angle_" + i]) for i in ["alpha", "beta", "gamma"]] lattice = Lattice.from_lengths_and_angles(lengths, angles) try: sympos = data["_symmetry_equiv_pos_as_xyz"] except KeyError: try: sympos = data["_symmetry_equiv_pos_as_xyz_"] except KeyError: warnings.warn("No _symmetry_equiv_pos_as_xyz type key found. " "Defaulting to P1.") sympos = ['x, y, z'] self.symmetry_operations = parse_symmetry_operations(sympos) def parse_symbol(sym): m = re.search("([A-Z][a-z]*)", sym) if m: return m.group(1) return "" try: oxi_states = {data["_atom_type_symbol"][i]: float_from_str(data["_atom_type_oxidation_number"][i]) for i in xrange(len(data["_atom_type_symbol"]))} except (ValueError, KeyError): oxi_states = None coord_to_species = OrderedDict() for i in xrange(len(data["_atom_site_type_symbol"])): symbol = parse_symbol(data["_atom_site_type_symbol"][i]) if oxi_states is not None: el = Specie(symbol, oxi_states[data["_atom_site_type_symbol"][i]]) else: el = Element(symbol) x = float_from_str(data["_atom_site_fract_x"][i]) y = float_from_str(data["_atom_site_fract_y"][i]) z = float_from_str(data["_atom_site_fract_z"][i]) try: occu = float_from_str(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) if coord not in coord_to_species: coord_to_species[coord] = {el: occu} else: coord_to_species[coord][el] = occu allspecies = [] allcoords = [] for coord, species in coord_to_species.items(): coords = self._unique_coords(coord) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) #rescale occupancies if necessary for species in allspecies: totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: for key, value in species.iteritems(): species[key] = value / totaloccu struct = Structure(lattice, allspecies, allcoords) if primitive: struct = struct.get_primitive_structure() return struct.get_sorted_structure()
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing #common representations for elements/water in cif files special_symbols = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O" } elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp( special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol) if \ symbol in special_symbols else symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) if coord not in coord_to_species: coord_to_species[coord] = {el: occu} else: coord_to_species[coord][el] = occu coord_to_species = { k: Composition(v) for k, v in coord_to_species.items() } allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) #rescale occupancies if necessary for species in allspecies: totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: for key, value in six.iteritems(species): species[key] = value / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def get_slab(self, shift=0, tol=0.1, energy=None): """ This method takes in shift value for the c lattice direction and generates a slab based on the given shift. You should rarely use this method. Instead, it is used by other generation algorithms to obtain all slabs. Arg: shift (float): A shift value in Angstrom that determines how much a slab should be shifted. tol (float): Tolerance to determine primitive cell. energy (float): An energy to assign to the slab. Returns: (Slab) A Slab object with a particular shifted oriented unit cell. """ h = self._proj_height nlayers_slab = int(math.ceil(self.min_slab_size / h)) nlayers_vac = int(math.ceil(self.min_vac_size / h)) nlayers = nlayers_slab + nlayers_vac species = self.oriented_unit_cell.species_and_occu props = self.oriented_unit_cell.site_properties props = {k: v * nlayers_slab for k, v in props.items()} frac_coords = self.oriented_unit_cell.frac_coords frac_coords = np.array(frac_coords) +\ np.array([0, 0, -shift])[None, :] frac_coords = frac_coords - np.floor(frac_coords) a, b, c = self.oriented_unit_cell.lattice.matrix new_lattice = [a, b, nlayers * c] frac_coords[:, 2] = frac_coords[:, 2] / nlayers all_coords = [] for i in range(nlayers_slab): fcoords = frac_coords.copy() fcoords[:, 2] += i / nlayers all_coords.extend(fcoords) slab = Structure(new_lattice, species * nlayers_slab, all_coords, site_properties=props) scale_factor = self.slab_scale_factor # Whether or not to orthogonalize the structure if self.lll_reduce: lll_slab = slab.copy(sanitize=True) mapping = lll_slab.lattice.find_mapping(slab.lattice) scale_factor = np.dot(mapping[2], scale_factor) slab = lll_slab # Whether or not to center the slab layer around the vacuum if self.center_slab: avg_c = np.average([c[2] for c in slab.frac_coords]) slab.translate_sites(list(range(len(slab))), [0, 0, 0.5 - avg_c]) if self.primitive: prim = slab.get_primitive_structure(tolerance=tol) if energy is not None: energy = prim.volume / slab.volume * energy slab = prim return Slab(slab.lattice, slab.species_and_occu, slab.frac_coords, self.miller_index, self.oriented_unit_cell, shift, scale_factor, site_properties=slab.site_properties, energy=energy)
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def parse_symbol(sym): # Common representations for elements/water in cif files # TODO: fix inconsistent handling of water special = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O", "OH": "", "OH2": ""} m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": if sym in special: v = special[sym] else: v = special.get(m[0], m[0]) if len(m) > 1 or (m[0] in special): warnings.warn("{} parsed as {}".format(sym, v)) return v lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False ############################################################ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do # not contain this key. if "_atom_site_type_symbol" in data.data.keys(): # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 # 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed # as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the # label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of # Nb and 0.2 of Zr. idxs_to_remove = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have # switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. # Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and # symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store # in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub(r'\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the # string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall(r'\D+', symbol_str_lst[ elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall(r'\.?\d+', symbol_str_lst[ elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) coord = (x, y, z) # Add each partially occupied element on the site coordinate for et in els_occu: match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition( {parse_symbol(et): els_occu[parse_symbol(et)]}) else: coord_to_species[match] += { parse_symbol(et): els_occu[parse_symbol(et)]} idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF # data looking for lists, which indicates # multiple data items, one for each row, and remove items from the # list that corresponds to the removed row, # so that it's not processed by the rest of this function (which # would result in an error). for cif_key in data.data: if type(data.data[cif_key]) == list: for id in sorted(idxs_to_remove, reverse=True): del data.data[cif_key][id] ############################################################ for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = parse_symbol(data["_atom_site_type_symbol"][i]) except KeyError: symbol = parse_symbol(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in data.data.keys(): oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} sum_occu = [sum(c.values()) for c in coord_to_species.values()] if any([o > 1 for o in sum_occu]): warnings.warn("Some occupancies (%s) sum to > 1! If they are within " "the tolerance, they will be rescaled." % str(sum_occu)) allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ lengths = [ str2float(data["_cell_length_" + i]) for i in ["a", "b", "c"] ] angles = [ str2float(data["_cell_angle_" + i]) for i in ["alpha", "beta", "gamma"] ] lattice = Lattice.from_lengths_and_angles(lengths, angles) try: sympos = data["_symmetry_equiv_pos_as_xyz"] except KeyError: try: sympos = data["_symmetry_equiv_pos_as_xyz_"] except KeyError: warnings.warn("No _symmetry_equiv_pos_as_xyz type key found. " "Defaulting to P1.") sympos = ['x, y, z'] self.symmetry_operations = parse_symmetry_operations(sympos) def parse_symbol(sym): m = re.search("([A-Z][a-z]*)", sym) if m: return m.group(1) return "" try: oxi_states = { data["_atom_type_symbol"][i]: str2float(data["_atom_type_oxidation_number"][i]) for i in xrange(len(data["_atom_type_symbol"])) } except (ValueError, KeyError): oxi_states = None coord_to_species = OrderedDict() for i in xrange(len(data["_atom_site_type_symbol"])): symbol = parse_symbol(data["_atom_site_type_symbol"][i]) if oxi_states is not None: el = Specie(symbol, oxi_states[data["_atom_site_type_symbol"][i]]) else: el = Element(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) if coord not in coord_to_species: coord_to_species[coord] = {el: occu} else: coord_to_species[coord][el] = occu allspecies = [] allcoords = [] for coord, species in coord_to_species.items(): coords = self._unique_coords(coord) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) #rescale occupancies if necessary for species in allspecies: totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: for key, value in species.iteritems(): species[key] = value / totaloccu struct = Structure(lattice, allspecies, allcoords) if primitive: struct = struct.get_primitive_structure().get_reduced_structure() return struct.get_sorted_structure()
def __init__( self, structure: Structure, overwrite_magmom_mode: Union[OverwriteMagmomMode, str] = "none", round_magmoms: bool = False, detect_valences: bool = False, make_primitive: bool = True, default_magmoms: bool = None, set_net_positive: bool = True, threshold: float = 0.1, ): """ A class which provides a few helpful methods to analyze collinear magnetic structures. If magnetic moments are not defined, moments will be taken either from default_magmoms.yaml (similar to the default magmoms in MPRelaxSet, with a few extra definitions) or from a specie:magmom dict provided by the default_magmoms kwarg. Input magmoms can be replaced using the 'overwrite_magmom_mode' kwarg. This can be: * "none" to do nothing, * "respect_sign" which will overwrite existing magmoms with those from default_magmoms but will keep sites with positive magmoms positive, negative magmoms negative and zero magmoms zero, * "respect_zeros", which will give a ferromagnetic structure (all positive magmoms from default_magmoms) but still keep sites with zero magmoms as zero, * "replace_all" which will try to guess initial magmoms for all sites in the structure irrespective of input structure (this is most suitable for an initial DFT calculation), * "replace_all_if_undefined" is the same as "replace_all" but only if no magmoms are defined in input structure, otherwise it will respect existing magmoms. * "normalize" will normalize magmoms to unity, but will respect sign (used for comparing orderings), magmoms < theshold will be set to zero :param structure: Structure object :param overwrite_magmom_mode (str): default "none" :param round_magmoms (int or bool): will round input magmoms to specified number of decimal places if integer is supplied, if set to a float will try and group magmoms together using a kernel density estimator of provided width, and extracting peaks of the estimator :param detect_valences (bool): if True, will attempt to assign valences to input structure :param make_primitive (bool): if True, will transform to primitive magnetic cell :param default_magmoms (dict): (optional) dict specifying default magmoms :param set_net_positive (bool): if True, will change sign of magnetic moments such that the net magnetization is positive. Argument will be ignored if mode "respect_sign" is used. :param threshold (float): number (in Bohr magnetons) below which magmoms will be rounded to zero, default of 0.1 can probably be increased for many magnetic systems, depending on your application """ if default_magmoms: self.default_magmoms = default_magmoms else: self.default_magmoms = DEFAULT_MAGMOMS structure = structure.copy() # check for disorder if not structure.is_ordered: raise NotImplementedError( "Not implemented for disordered structures, " "make ordered approximation first." ) if detect_valences: trans = AutoOxiStateDecorationTransformation() bva = BVAnalyzer() try: structure = trans.apply_transformation(structure) except ValueError: warnings.warn( "Could not assign valences " "for {}".format(structure.composition.reduced_formula) ) # check to see if structure has magnetic moments # on site properties or species spin properties, # prioritize site properties has_magmoms = bool(structure.site_properties.get("magmom", False)) has_spin = False for comp in structure.species_and_occu: for sp, occu in comp.items(): if getattr(sp, "spin", False): has_spin = True # perform input sanitation ... # rest of class will assume magnetic moments # are stored on site properties: # this is somewhat arbitrary, arguments can # be made for both approaches if has_magmoms and has_spin: raise ValueError( "Structure contains magnetic moments on both " "magmom site properties and spin species " "properties. This is ambiguous. Remove one or " "the other." ) elif has_magmoms: if None in structure.site_properties["magmom"]: warnings.warn( "Be careful with mixing types in your magmom " "site properties. Any 'None' magmoms have been " "replaced with zero." ) magmoms = [m if m else 0 for m in structure.site_properties["magmom"]] elif has_spin: magmoms = [getattr(sp, "spin", 0) for sp in structure.species] structure.remove_spin() else: # no magmoms present, add zero magmoms for now magmoms = [0] * len(structure) # and overwrite magmoms with default magmoms later unless otherwise stated if overwrite_magmom_mode == "replace_all_if_undefined": overwrite_magmom_mode = "replace_all" # test to see if input structure has collinear magmoms self.is_collinear = Magmom.are_collinear(magmoms) if not self.is_collinear: warnings.warn( "This class is not designed to be used with " "non-collinear structures. If your structure is " "only slightly non-collinear (e.g. canted) may still " "give useful results, but use with caution." ) # this is for collinear structures only, make sure magmoms # are all floats magmoms = list(map(float, magmoms)) # set properties that should be done /before/ we process input magmoms self.total_magmoms = sum(magmoms) self.magnetization = sum(magmoms) / structure.volume # round magmoms below threshold to zero magmoms = [m if abs(m) > threshold else 0 for m in magmoms] # overwrite existing magmoms with default_magmoms if overwrite_magmom_mode not in ( "none", "respect_sign", "respect_zeros", "replace_all", "replace_all_if_undefined", "normalize", ): raise ValueError("Unsupported mode.") for idx, site in enumerate(structure): if site.species_string in self.default_magmoms: # look for species first, e.g. Fe2+ default_magmom = self.default_magmoms[site.species_string] elif ( isinstance(site.specie, Specie) and str(site.specie.element) in self.default_magmoms ): # look for element, e.g. Fe default_magmom = self.default_magmoms[str(site.specie.element)] else: default_magmom = 0 # overwrite_magmom_mode = "respect_sign" will change magnitude of # existing moments only, and keep zero magmoms as # zero: it will keep the magnetic ordering intact if overwrite_magmom_mode == "respect_sign": set_net_positive = False if magmoms[idx] > 0: magmoms[idx] = default_magmom elif magmoms[idx] < 0: magmoms[idx] = -default_magmom # overwrite_magmom_mode = "respect_zeros" will give a ferromagnetic # structure but will keep zero magmoms as zero elif overwrite_magmom_mode == "respect_zeros": if magmoms[idx] != 0: magmoms[idx] = default_magmom # overwrite_magmom_mode = "replace_all" will ignore input magmoms # and give a ferromagnetic structure with magnetic # moments on *all* atoms it thinks could be magnetic elif overwrite_magmom_mode == "replace_all": magmoms[idx] = default_magmom # overwrite_magmom_mode = "normalize" set magmoms magnitude to 1 elif overwrite_magmom_mode == "normalize": if magmoms[idx] != 0: magmoms[idx] = int(magmoms[idx] / abs(magmoms[idx])) # round magmoms, used to smooth out computational data magmoms = ( self._round_magmoms(magmoms, round_magmoms) if round_magmoms else magmoms ) if set_net_positive: sign = np.sum(magmoms) if sign < 0: magmoms = -np.array(magmoms) structure.add_site_property("magmom", magmoms) if make_primitive: structure = structure.get_primitive_structure(use_site_props=True) self.structure = structure
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ lengths = [ str2float(data["_cell_length_" + i]) for i in ["a", "b", "c"] ] angles = [ str2float(data["_cell_angle_" + i]) for i in ["alpha", "beta", "gamma"] ] lattice = Lattice.from_lengths_and_angles(lengths, angles) try: sympos = data["_symmetry_equiv_pos_as_xyz"] except KeyError: try: sympos = data["_symmetry_equiv_pos_as_xyz_"] except KeyError: warnings.warn("No _symmetry_equiv_pos_as_xyz type key found. " "Defaulting to P1.") sympos = ['x, y, z'] self.symmetry_operations = [SymmOp.from_xyz_string(s) for s in sympos] def parse_symbol(sym): # capitalization conventions are not strictly followed, eg Cu will be CU m = re.search("([A-Za-z]*)", sym) if m: return m.group(1)[:2].capitalize() return "" try: oxi_states = { data["_atom_type_symbol"][i]: str2float(data["_atom_type_oxidation_number"][i]) for i in range(len(data["_atom_type_symbol"])) } except (ValueError, KeyError): oxi_states = None coord_to_species = OrderedDict() for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: Element(symbol) except KeyError: symbol = parse_symbol(data["_atom_site_type_symbol"][i]) if oxi_states is not None: # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): k = data["_atom_site_type_symbol"][i] else: k = symbol el = Specie(symbol, oxi_states[k]) else: el = Element(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) if coord not in coord_to_species: coord_to_species[coord] = {el: occu} else: coord_to_species[coord][el] = occu allspecies = [] allcoords = [] for coord, species in coord_to_species.items(): coords = self._unique_coords(coord) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) #rescale occupancies if necessary for species in allspecies: totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: for key, value in six.iteritems(species): species[key] = value / totaloccu struct = Structure(lattice, allspecies, allcoords) if primitive: struct = struct.get_primitive_structure().get_reduced_structure() return struct.get_sorted_structure()
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O" } elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) elif sym in ['OH', 'OH2']: warnings.warn("Symbol '{}' not recognized".format(sym)) return "" else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False ############################################################ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do not contain this key. if "_atom_site_type_symbol" in data.data.keys(): # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of Nb and 0.2 of Zr. idxs_to_remove = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub( '\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall('\D+', symbol_str_lst[elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall('\.?\d+', symbol_str_lst[elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) coord = (x, y, z) # Add each partially occupied element on the site coordinate for et in els_occu: match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition( {parse_symbol(et): els_occu[parse_symbol(et)]}) else: coord_to_species[match] += { parse_symbol(et): els_occu[parse_symbol(et)] } idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF data looking for lists, which indicates # multiple data items, one for each row, and remove items from the list that corresponds to the removed row, # so that it's not processed by the rest of this function (which would result in an error). for cif_key in data.data: if type(data.data[cif_key]) == list: for id in sorted(idxs_to_remove, reverse=True): del data.data[cif_key][id] ############################################################ for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp( special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O"} elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) elif sym in ['OH', 'OH2']: warnings.warn("Symbol '{}' not recognized".format(sym)) return "" else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False ############################################################ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do not contain this key. if "_atom_site_type_symbol" in data.data.keys(): # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of Nb and 0.2 of Zr. idxs_to_remove = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub('\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall('\D+', symbol_str_lst[elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall('\.?\d+', symbol_str_lst[elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) coord = (x, y, z) # Add each partially occupied element on the site coordinate for et in els_occu: match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({parse_symbol(et): els_occu[parse_symbol(et)]}) else: coord_to_species[match] += {parse_symbol(et): els_occu[parse_symbol(et)]} idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF data looking for lists, which indicates # multiple data items, one for each row, and remove items from the list that corresponds to the removed row, # so that it's not processed by the rest of this function (which would result in an error). for cif_key in data.data: if type(data.data[cif_key]) == list: for id in sorted(idxs_to_remove, reverse=True): del data.data[cif_key][id] ############################################################ for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp(special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def parse_symbol(sym): # Common representations for elements/water in cif files # TODO: fix inconsistent handling of water special = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O", "OH": "", "OH2": "" } m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": if sym in special: v = special[sym] else: v = special.get(m[0], m[0]) if len(m) > 1 or (m[0] in special): warnings.warn("{} parsed as {}".format(sym, v)) return v lattice = self.get_lattice(data) # if magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict if self.feature_flags["magcif_incommensurate"]: raise NotImplementedError( "Incommensurate structures not currently supported.") elif self.feature_flags["magcif"]: self.symmetry_operations = self.get_magsymops(data) magmoms = self.parse_magmoms(data, lattice=lattice) else: self.symmetry_operations = self.get_symops(data) magmoms = {} oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() coord_to_magmoms = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = parse_symbol(data["_atom_site_type_symbol"][i]) except KeyError: symbol = parse_symbol(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in data.data.keys(): oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) magmom = magmoms.get(data["_atom_site_label"][i], Magmom(0)) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) coord_to_magmoms[coord] = magmom else: coord_to_species[match] += {el: occu} coord_to_magmoms[ match] = None # disordered magnetic not currently supported sum_occu = [sum(c.values()) for c in coord_to_species.values()] if any([o > 1 for o in sum_occu]): warnings.warn( "Some occupancies (%s) sum to > 1! If they are within " "the tolerance, they will be rescaled." % str(sum_occu)) allspecies = [] allcoords = [] allmagmoms = [] # check to see if magCIF file is disordered if self.feature_flags["magcif"]: for k, v in coord_to_magmoms.items(): if v is None: # Proposed solution to this is to instead store magnetic moments # as Specie 'spin' property, instead of site property, but this # introduces ambiguities for end user (such as unintended use of # `spin` and Specie will have fictious oxidation state). raise NotImplementedError( 'Disordered magnetic structures not currently supported.' ) if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] tmp_magmom = [ coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords ] if self.feature_flags["magcif"]: coords, magmoms = self._unique_coords( tmp_coords, tmp_magmom) else: coords, magmoms = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) allmagmoms.extend(magmoms) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords) and len( allspecies) == len(allmagmoms): if self.feature_flags["magcif"]: struct = Structure(lattice, allspecies, allcoords, site_properties={"magmom": allmagmoms}) else: struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O"} elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp(special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct