Example #1
0
    def _get_structure(self, data, primitive, substitution_dictionary=None):
        """
        Generate structure from part of the cif.
        """
        # Symbols often representing
        #common representations for elements/water in cif files
        special_symbols = {
            "D": "D",
            "Hw": "H",
            "Ow": "O",
            "Wat": "O",
            "wat": "O"
        }
        elements = [el.symbol for el in Element]

        lattice = self.get_lattice(data)
        self.symmetry_operations = self.get_symops(data)
        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()

        def parse_symbol(sym):

            if substitution_dictionary:
                return substitution_dictionary.get(sym)
            else:
                m = re.findall(r"w?[A-Z][a-z]*", sym)
                if m and m != "?":
                    return m[0]
                return ""

        for i in range(len(data["_atom_site_label"])):
            symbol = parse_symbol(data["_atom_site_label"][i])

            if symbol:
                if symbol not in elements and symbol not in special_symbols:
                    symbol = symbol[:2]
            else:
                continue
            # make sure symbol was properly parsed from _atom_site_label
            # otherwise get it from _atom_site_type_symbol
            try:
                if symbol in special_symbols:
                    get_el_sp(special_symbols.get(symbol))
                else:
                    Element(symbol)
            except (KeyError, ValueError):
                # sometimes the site doesn't have the type_symbol.
                # we then hope the type_symbol can be parsed from the label
                if "_atom_site_type_symbol" in data.data.keys():
                    symbol = data["_atom_site_type_symbol"][i]

            if oxi_states is not None:
                if symbol in special_symbols:
                    el = get_el_sp(
                        special_symbols.get(symbol) + str(oxi_states[symbol]))
                else:
                    el = Specie(symbol, oxi_states.get(symbol, 0))
            else:

                el = get_el_sp(special_symbols.get(symbol) if \
                            symbol in special_symbols else symbol)

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1
            if occu > 0:
                coord = (x, y, z)
                if coord not in coord_to_species:
                    coord_to_species[coord] = {el: occu}
                else:
                    coord_to_species[coord][el] = occu

        coord_to_species = {
            k: Composition(v)
            for k, v in coord_to_species.items()
        }
        allspecies = []
        allcoords = []

        if coord_to_species.items():
            for species, group in groupby(sorted(list(
                    coord_to_species.items()),
                                                 key=lambda x: x[1]),
                                          key=lambda x: x[1]):

                tmp_coords = [site[0] for site in group]

                coords = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])

            #rescale occupancies if necessary
            for species in allspecies:
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    for key, value in six.iteritems(species):
                        species[key] = value / totaloccu

        if allspecies and len(allspecies) == len(allcoords):

            struct = Structure(lattice, allspecies, allcoords)
            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct
Example #2
0
    def _get_structure(self, data, primitive, substitution_dictionary=None):
        """
        Generate structure from part of the cif.
        """
        # Symbols often representing
        # common representations for elements/water in cif files
        special_symbols = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O",
                           "wat": "O"}
        elements = [el.symbol for el in Element]

        lattice = self.get_lattice(data)
        self.symmetry_operations = self.get_symops(data)
        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()

        def parse_symbol(sym):

            if substitution_dictionary:
                return substitution_dictionary.get(sym)
            elif sym in ['OH', 'OH2']:
                warnings.warn("Symbol '{}' not recognized".format(sym))
                return ""
            else:
                m = re.findall(r"w?[A-Z][a-z]*", sym)
                if m and m != "?":
                    return m[0]
                return ""

        def get_matching_coord(coord):
            for op in self.symmetry_operations:
                c = op.operate(coord)
                for k in coord_to_species.keys():
                    if np.allclose(pbc_diff(c, k), (0, 0, 0),
                                   atol=self._site_tolerance):
                        return tuple(k)
            return False

        ############################################################
        """
        This part of the code deals with handling formats of data as found in CIF files extracted from the
        Springer Materials/Pauling File databases, and that are different from standard ICSD formats.
        """

        # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do not contain this key.
        if "_atom_site_type_symbol" in data.data.keys():

            # Keep a track of which data row needs to be removed.
            # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 'rhombic dodecahedron, Nb<sub>14</sub>'
            # Without this code, the above row in a structure would be parsed as an ordered site with only Nb (since
            # CifParser would try to parse the first two characters of the label "Nb,Zr") and occupancy=1.
            # However, this site is meant to be a disordered site with 0.8 of Nb and 0.2 of Zr.
            idxs_to_remove = []

            for idx, el_row in enumerate(data["_atom_site_label"]):

                # CIF files from the Springer Materials/Pauling File have switched the label and symbol. Thus, in the
                # above shown example row, '0.8Nb + 0.2Zr' is the symbol. Below, we split the strings on ' + ' to
                # check if the length (or number of elements) in the label and symbol are equal.
                if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \
                        len(data["_atom_site_label"][idx].split(' + ')):

                    # Dictionary to hold extracted elements and occupancies
                    els_occu = {}

                    # parse symbol to get element names and occupancy and store in "els_occu"
                    symbol_str = data["_atom_site_type_symbol"][idx]
                    symbol_str_lst = symbol_str.split(' + ')
                    for elocc_idx in range(len(symbol_str_lst)):
                        # Remove any bracketed items in the string
                        symbol_str_lst[elocc_idx] = re.sub('\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip())

                        # Extract element name and its occupancy from the string, and store it as a
                        # key-value pair in "els_occ".
                        els_occu[str(re.findall('\D+', symbol_str_lst[elocc_idx].strip())[1]).replace('<sup>', '')] = \
                            float('0' + re.findall('\.?\d+', symbol_str_lst[elocc_idx].strip())[1])

                    x = str2float(data["_atom_site_fract_x"][idx])
                    y = str2float(data["_atom_site_fract_y"][idx])
                    z = str2float(data["_atom_site_fract_z"][idx])

                    coord = (x, y, z)
                    # Add each partially occupied element on the site coordinate
                    for et in els_occu:
                        match = get_matching_coord(coord)
                        if not match:
                            coord_to_species[coord] = Composition({parse_symbol(et): els_occu[parse_symbol(et)]})
                        else:
                            coord_to_species[match] += {parse_symbol(et): els_occu[parse_symbol(et)]}
                    idxs_to_remove.append(idx)

            # Remove the original row by iterating over all keys in the CIF data looking for lists, which indicates
            # multiple data items, one for each row, and remove items from the list that corresponds to the removed row,
            # so that it's not processed by the rest of this function (which would result in an error).
            for cif_key in data.data:
                if type(data.data[cif_key]) == list:
                    for id in sorted(idxs_to_remove, reverse=True):
                        del data.data[cif_key][id]

        ############################################################

        for i in range(len(data["_atom_site_label"])):
            symbol = parse_symbol(data["_atom_site_label"][i])

            if symbol:
                if symbol not in elements and symbol not in special_symbols:
                    symbol = symbol[:2]
            else:
                continue
            # make sure symbol was properly parsed from _atom_site_label
            # otherwise get it from _atom_site_type_symbol
            try:
                if symbol in special_symbols:
                    get_el_sp(special_symbols.get(symbol))
                else:
                    Element(symbol)
            except (KeyError, ValueError):
                # sometimes the site doesn't have the type_symbol.
                # we then hope the type_symbol can be parsed from the label
                if "_atom_site_type_symbol" in data.data.keys():
                    symbol = data["_atom_site_type_symbol"][i]

            if oxi_states is not None:
                if symbol in special_symbols:
                    el = get_el_sp(special_symbols.get(symbol) +
                                   str(oxi_states[symbol]))
                else:
                    el = Specie(symbol, oxi_states.get(symbol, 0))
            else:

                el = get_el_sp(special_symbols.get(symbol, symbol))

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1

            if occu > 0:
                coord = (x, y, z)
                match = get_matching_coord(coord)
                if not match:
                    coord_to_species[coord] = Composition({el: occu})
                else:
                    coord_to_species[match] += {el: occu}

        if any([sum(c.values()) > 1 for c in coord_to_species.values()]):
            warnings.warn("Some occupancies sum to > 1! If they are within "
                          "the tolerance, they will be rescaled.")

        allspecies = []
        allcoords = []

        if coord_to_species.items():
            for species, group in groupby(
                    sorted(list(coord_to_species.items()), key=lambda x: x[1]),
                    key=lambda x: x[1]):

                tmp_coords = [site[0] for site in group]

                coords = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])

            # rescale occupancies if necessary
            for i, species in enumerate(allspecies):
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    allspecies[i] = species / totaloccu

        if allspecies and len(allspecies) == len(allcoords):
            struct = Structure(lattice, allspecies, allcoords)
            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct
Example #3
0
    def _get_structure(self, data, primitive):
        """
        Generate structure from part of the cif.
        """
        def parse_symbol(sym):
            # Common representations for elements/water in cif files
            # TODO: fix inconsistent handling of water
            special = {
                "D": "D",
                "Hw": "H",
                "Ow": "O",
                "Wat": "O",
                "wat": "O",
                "OH": "",
                "OH2": ""
            }
            m = re.findall(r"w?[A-Z][a-z]*", sym)
            if m and m != "?":
                if sym in special:
                    v = special[sym]
                else:
                    v = special.get(m[0], m[0])
                if len(m) > 1 or (m[0] in special):
                    warnings.warn("{} parsed as {}".format(sym, v))
                return v

        lattice = self.get_lattice(data)

        # if magCIF, get magnetic symmetry moments and magmoms
        # else standard CIF, and use empty magmom dict
        if self.feature_flags["magcif_incommensurate"]:
            raise NotImplementedError(
                "Incommensurate structures not currently supported.")
        elif self.feature_flags["magcif"]:
            self.symmetry_operations = self.get_magsymops(data)
            magmoms = self.parse_magmoms(data, lattice=lattice)
        else:
            self.symmetry_operations = self.get_symops(data)
            magmoms = {}

        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()
        coord_to_magmoms = OrderedDict()

        def get_matching_coord(coord):
            keys = list(coord_to_species.keys())
            coords = np.array(keys)
            for op in self.symmetry_operations:
                c = op.operate(coord)
                inds = find_in_coord_list_pbc(coords,
                                              c,
                                              atol=self._site_tolerance)
                # cant use if inds, because python is dumb and np.array([0]) evaluates
                # to False
                if len(inds):
                    return keys[inds[0]]
            return False

        for i in range(len(data["_atom_site_label"])):
            try:
                # If site type symbol exists, use it. Otherwise, we use the
                # label.
                symbol = parse_symbol(data["_atom_site_type_symbol"][i])
            except KeyError:
                symbol = parse_symbol(data["_atom_site_label"][i])
            if not symbol:
                continue

            if oxi_states is not None:
                o_s = oxi_states.get(symbol, 0)
                # use _atom_site_type_symbol if possible for oxidation state
                if "_atom_site_type_symbol" in data.data.keys():
                    oxi_symbol = data["_atom_site_type_symbol"][i]
                    o_s = oxi_states.get(oxi_symbol, o_s)
                try:
                    el = Specie(symbol, o_s)
                except:
                    el = DummySpecie(symbol, o_s)
            else:
                el = get_el_sp(symbol)

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            magmom = magmoms.get(data["_atom_site_label"][i], Magmom(0))

            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1

            if occu > 0:
                coord = (x, y, z)
                match = get_matching_coord(coord)
                if not match:
                    coord_to_species[coord] = Composition({el: occu})
                    coord_to_magmoms[coord] = magmom
                else:
                    coord_to_species[match] += {el: occu}
                    coord_to_magmoms[
                        match] = None  # disordered magnetic not currently supported

        sum_occu = [sum(c.values()) for c in coord_to_species.values()]
        if any([o > 1 for o in sum_occu]):
            warnings.warn(
                "Some occupancies (%s) sum to > 1! If they are within "
                "the tolerance, they will be rescaled." % str(sum_occu))

        allspecies = []
        allcoords = []
        allmagmoms = []

        # check to see if magCIF file is disordered
        if self.feature_flags["magcif"]:
            for k, v in coord_to_magmoms.items():
                if v is None:
                    # Proposed solution to this is to instead store magnetic moments
                    # as Specie 'spin' property, instead of site property, but this
                    # introduces ambiguities for end user (such as unintended use of
                    # `spin` and Specie will have fictious oxidation state).
                    raise NotImplementedError(
                        'Disordered magnetic structures not currently supported.'
                    )

        if coord_to_species.items():
            for species, group in groupby(sorted(list(
                    coord_to_species.items()),
                                                 key=lambda x: x[1]),
                                          key=lambda x: x[1]):
                tmp_coords = [site[0] for site in group]
                tmp_magmom = [
                    coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords
                ]

                if self.feature_flags["magcif"]:
                    coords, magmoms = self._unique_coords(
                        tmp_coords, tmp_magmom)
                else:
                    coords, magmoms = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])
                allmagmoms.extend(magmoms)

            # rescale occupancies if necessary
            for i, species in enumerate(allspecies):
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    allspecies[i] = species / totaloccu

        if allspecies and len(allspecies) == len(allcoords) and len(
                allspecies) == len(allmagmoms):

            if self.feature_flags["magcif"]:
                struct = Structure(lattice,
                                   allspecies,
                                   allcoords,
                                   site_properties={"magmom": allmagmoms})
            else:
                struct = Structure(lattice, allspecies, allcoords)

            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct
Example #4
0
    def _get_structure(self, data, primitive, substitution_dictionary=None):
        """
        Generate structure from part of the cif.
        """
        # Symbols often representing
        #common representations for elements/water in cif files
        special_symbols = {"D":"D", "Hw":"H", "Ow":"O", "Wat":"O", "wat": "O"}
        elements = map(str, ptable.all_elements)

        lattice = self.get_lattice(data)
        self.symmetry_operations = self.get_symops(data)
        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()

        def parse_symbol(sym):

            if substitution_dictionary:
                return substitution_dictionary.get(sym)
            else:
                m = re.findall(r"w?[A-Z][a-z]*", sym)
                if m and m != "?":
                    return m[0]
                return ""

        for i in range(len(data["_atom_site_label"])):
            symbol = parse_symbol(data["_atom_site_label"][i])

            if symbol:
                if symbol not in elements and symbol not in special_symbols:
                    symbol = symbol[:2]
            else:
                continue
            # make sure symbol was properly parsed from _atom_site_label
            # otherwise get it from _atom_site_type_symbol
            try:
                if symbol in special_symbols:
                    get_el_sp(special_symbols.get(symbol))
                else:
                    Element(symbol)
            except KeyError:
                # sometimes the site doesn't have the type_symbol.
                # we then hope the type_symbol can be parsed from the label
                if "_atom_site_type_symbol" in data.data.keys():
                    symbol = data["_atom_site_type_symbol"][i]

            if oxi_states is not None:
                if symbol in special_symbols:
                    el = get_el_sp(special_symbols.get(symbol) +
                                   str(oxi_states[symbol]))
                else:
                    el = Specie(symbol, oxi_states.get(symbol, 0))
            else:

                el = get_el_sp(special_symbols.get(symbol) if \
                            symbol in special_symbols else symbol)

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1
            if occu > 0:
                coord = (x, y, z)
                if coord not in coord_to_species:
                    coord_to_species[coord] = {el: occu}
                else:
                    coord_to_species[coord][el] = occu


        coord_to_species = {k: Composition(v)
                            for k, v in coord_to_species.items()}
        allspecies = []
        allcoords = []

        if coord_to_species.items():
            for species, group in groupby(
                    sorted(list(coord_to_species.items()), key=lambda x: x[1]),
                    key=lambda x: x[1]):

                tmp_coords = [site[0] for site in group]

                coords = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])

            #rescale occupancies if necessary
            for species in allspecies:
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    for key, value in six.iteritems(species):
                        species[key] = value / totaloccu

        if allspecies and len(allspecies) == len(allcoords):

            struct = Structure(lattice, allspecies, allcoords)
            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct
Example #5
0
    def _get_structure(self, data, primitive):
        """
        Generate structure from part of the cif.
        """
        def parse_symbol(sym):
            # Common representations for elements/water in cif files
            # TODO: fix inconsistent handling of water
            special = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O",
                       "wat": "O", "OH": "", "OH2": ""}
            m = re.findall(r"w?[A-Z][a-z]*", sym)
            if m and m != "?":
                if sym in special:
                    v = special[sym]
                else:
                    v = special.get(m[0], m[0])
                if len(m) > 1 or (m[0] in special):
                    warnings.warn("{} parsed as {}".format(sym, v))
                return v

        lattice = self.get_lattice(data)
        self.symmetry_operations = self.get_symops(data)
        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()

        def get_matching_coord(coord):
            keys = list(coord_to_species.keys())
            coords = np.array(keys)
            for op in self.symmetry_operations:
                c = op.operate(coord)
                inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance)
                # cant use if inds, because python is dumb and np.array([0]) evaluates
                # to False
                if len(inds):
                    return keys[inds[0]]
            return False

        ############################################################
        """
        This part of the code deals with handling formats of data as found in
        CIF files extracted from the Springer Materials/Pauling File
        databases, and that are different from standard ICSD formats.
        """

        # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do
        # not contain this key.
        if "_atom_site_type_symbol" in data.data.keys():

            # Keep a track of which data row needs to be removed.
            # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14
            # 'rhombic dodecahedron, Nb<sub>14</sub>'
            # Without this code, the above row in a structure would be parsed
            # as an ordered site with only Nb (since
            # CifParser would try to parse the first two characters of the
            # label "Nb,Zr") and occupancy=1.
            # However, this site is meant to be a disordered site with 0.8 of
            # Nb and 0.2 of Zr.
            idxs_to_remove = []

            for idx, el_row in enumerate(data["_atom_site_label"]):

                # CIF files from the Springer Materials/Pauling File have
                # switched the label and symbol. Thus, in the
                # above shown example row, '0.8Nb + 0.2Zr' is the symbol.
                # Below, we split the strings on ' + ' to
                # check if the length (or number of elements) in the label and
                # symbol are equal.
                if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \
                        len(data["_atom_site_label"][idx].split(' + ')):

                    # Dictionary to hold extracted elements and occupancies
                    els_occu = {}

                    # parse symbol to get element names and occupancy and store
                    # in "els_occu"
                    symbol_str = data["_atom_site_type_symbol"][idx]
                    symbol_str_lst = symbol_str.split(' + ')
                    for elocc_idx in range(len(symbol_str_lst)):
                        # Remove any bracketed items in the string
                        symbol_str_lst[elocc_idx] = re.sub(r'\([0-9]*\)', '',
                            symbol_str_lst[elocc_idx].strip())

                        # Extract element name and its occupancy from the
                        # string, and store it as a
                        # key-value pair in "els_occ".
                        els_occu[str(re.findall(r'\D+', symbol_str_lst[
                            elocc_idx].strip())[1]).replace('<sup>', '')] = \
                            float('0' + re.findall(r'\.?\d+', symbol_str_lst[
                                elocc_idx].strip())[1])

                    x = str2float(data["_atom_site_fract_x"][idx])
                    y = str2float(data["_atom_site_fract_y"][idx])
                    z = str2float(data["_atom_site_fract_z"][idx])

                    coord = (x, y, z)
                    # Add each partially occupied element on the site coordinate
                    for et in els_occu:
                        match = get_matching_coord(coord)
                        if not match:
                            coord_to_species[coord] = Composition(
                                {parse_symbol(et): els_occu[parse_symbol(et)]})
                        else:
                            coord_to_species[match] += {
                                parse_symbol(et): els_occu[parse_symbol(et)]}
                    idxs_to_remove.append(idx)

            # Remove the original row by iterating over all keys in the CIF
            # data looking for lists, which indicates
            # multiple data items, one for each row, and remove items from the
            # list that corresponds to the removed row,
            # so that it's not processed by the rest of this function (which
            # would result in an error).
            for cif_key in data.data:
                if type(data.data[cif_key]) == list:
                    for id in sorted(idxs_to_remove, reverse=True):
                        del data.data[cif_key][id]

        ############################################################
        for i in range(len(data["_atom_site_label"])):
            try:
                # If site type symbol exists, use it. Otherwise, we use the
                # label.
                symbol = parse_symbol(data["_atom_site_type_symbol"][i])
            except KeyError:
                symbol = parse_symbol(data["_atom_site_label"][i])
            if not symbol:
                continue

            if oxi_states is not None:
                o_s = oxi_states.get(symbol, 0)
                # use _atom_site_type_symbol if possible for oxidation state
                if "_atom_site_type_symbol" in data.data.keys():
                    oxi_symbol = data["_atom_site_type_symbol"][i]
                    o_s = oxi_states.get(oxi_symbol, o_s)
                try:
                    el = Specie(symbol, o_s)
                except:
                    el = DummySpecie(symbol, o_s)
            else:
                el = get_el_sp(symbol)

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])

            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1

            if occu > 0:
                coord = (x, y, z)
                match = get_matching_coord(coord)
                if not match:
                    coord_to_species[coord] = Composition({el: occu})
                else:
                    coord_to_species[match] += {el: occu}

        sum_occu = [sum(c.values()) for c in coord_to_species.values()]
        if any([o > 1 for o in sum_occu]):
            warnings.warn("Some occupancies (%s) sum to > 1! If they are within "
                          "the tolerance, they will be rescaled." % str(sum_occu))

        allspecies = []
        allcoords = []

        if coord_to_species.items():
            for species, group in groupby(
                    sorted(list(coord_to_species.items()), key=lambda x: x[1]),
                    key=lambda x: x[1]):
                tmp_coords = [site[0] for site in group]

                coords = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])

            # rescale occupancies if necessary
            for i, species in enumerate(allspecies):
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    allspecies[i] = species / totaloccu

        if allspecies and len(allspecies) == len(allcoords):
            struct = Structure(lattice, allspecies, allcoords)
            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct
Example #6
0
    def _get_structure(self, data, primitive, substitution_dictionary=None):
        """
        Generate structure from part of the cif.
        """
        # Symbols often representing
        # common representations for elements/water in cif files
        special_symbols = {
            "D": "D",
            "Hw": "H",
            "Ow": "O",
            "Wat": "O",
            "wat": "O"
        }
        elements = [el.symbol for el in Element]

        lattice = self.get_lattice(data)
        self.symmetry_operations = self.get_symops(data)
        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()

        def parse_symbol(sym):

            if substitution_dictionary:
                return substitution_dictionary.get(sym)
            else:
                m = re.findall(r"w?[A-Z][a-z]*", sym)
                if m and m != "?":
                    return m[0]
                return ""

        def get_matching_coord(coord):
            for op in self.symmetry_operations:
                c = op.operate(coord)
                for k in coord_to_species.keys():
                    if np.allclose(pbc_diff(c, k), (0, 0, 0),
                                   atol=self._site_tolerance):
                        return tuple(k)
            return False

        for i in range(len(data["_atom_site_label"])):
            symbol = parse_symbol(data["_atom_site_label"][i])

            if symbol:
                if symbol not in elements and symbol not in special_symbols:
                    symbol = symbol[:2]
            else:
                continue
            # make sure symbol was properly parsed from _atom_site_label
            # otherwise get it from _atom_site_type_symbol
            try:
                if symbol in special_symbols:
                    get_el_sp(special_symbols.get(symbol))
                else:
                    Element(symbol)
            except (KeyError, ValueError):
                # sometimes the site doesn't have the type_symbol.
                # we then hope the type_symbol can be parsed from the label
                if "_atom_site_type_symbol" in data.data.keys():
                    symbol = data["_atom_site_type_symbol"][i]

            if oxi_states is not None:
                if symbol in special_symbols:
                    el = get_el_sp(
                        special_symbols.get(symbol) + str(oxi_states[symbol]))
                else:
                    el = Specie(symbol, oxi_states.get(symbol, 0))
            else:

                el = get_el_sp(special_symbols.get(symbol, symbol))

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1

            if occu > 0:
                coord = (x, y, z)
                match = get_matching_coord(coord)
                if not match:
                    coord_to_species[coord] = Composition({el: occu})
                else:
                    coord_to_species[match] += {el: occu}

        if any([sum(c.values()) > 1 for c in coord_to_species.values()]):
            warnings.warn("Some occupancies sum to > 1! If they are within "
                          "the tolerance, they will be rescaled.")

        allspecies = []
        allcoords = []

        if coord_to_species.items():
            for species, group in groupby(sorted(list(
                    coord_to_species.items()),
                                                 key=lambda x: x[1]),
                                          key=lambda x: x[1]):

                tmp_coords = [site[0] for site in group]

                coords = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])

            # rescale occupancies if necessary
            for i, species in enumerate(allspecies):
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    allspecies[i] = species / totaloccu

        if allspecies and len(allspecies) == len(allcoords):
            struct = Structure(lattice, allspecies, allcoords)
            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct
Example #7
0
    def _get_structure(self, data, primitive, substitution_dictionary=None):
        """
        Generate structure from part of the cif.
        """
        # Symbols often representing
        # common representations for elements/water in cif files
        special_symbols = {
            "D": "D",
            "Hw": "H",
            "Ow": "O",
            "Wat": "O",
            "wat": "O"
        }
        elements = [el.symbol for el in Element]

        lattice = self.get_lattice(data)
        self.symmetry_operations = self.get_symops(data)
        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()

        def parse_symbol(sym):

            if substitution_dictionary:
                return substitution_dictionary.get(sym)
            elif sym in ['OH', 'OH2']:
                warnings.warn("Symbol '{}' not recognized".format(sym))
                return ""
            else:
                m = re.findall(r"w?[A-Z][a-z]*", sym)
                if m and m != "?":
                    return m[0]
                return ""

        def get_matching_coord(coord):
            for op in self.symmetry_operations:
                c = op.operate(coord)
                for k in coord_to_species.keys():
                    if np.allclose(pbc_diff(c, k), (0, 0, 0),
                                   atol=self._site_tolerance):
                        return tuple(k)
            return False

        ############################################################
        """
        This part of the code deals with handling formats of data as found in CIF files extracted from the
        Springer Materials/Pauling File databases, and that are different from standard ICSD formats.
        """

        # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do not contain this key.
        if "_atom_site_type_symbol" in data.data.keys():

            # Keep a track of which data row needs to be removed.
            # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 'rhombic dodecahedron, Nb<sub>14</sub>'
            # Without this code, the above row in a structure would be parsed as an ordered site with only Nb (since
            # CifParser would try to parse the first two characters of the label "Nb,Zr") and occupancy=1.
            # However, this site is meant to be a disordered site with 0.8 of Nb and 0.2 of Zr.
            idxs_to_remove = []

            for idx, el_row in enumerate(data["_atom_site_label"]):

                # CIF files from the Springer Materials/Pauling File have switched the label and symbol. Thus, in the
                # above shown example row, '0.8Nb + 0.2Zr' is the symbol. Below, we split the strings on ' + ' to
                # check if the length (or number of elements) in the label and symbol are equal.
                if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \
                        len(data["_atom_site_label"][idx].split(' + ')):

                    # Dictionary to hold extracted elements and occupancies
                    els_occu = {}

                    # parse symbol to get element names and occupancy and store in "els_occu"
                    symbol_str = data["_atom_site_type_symbol"][idx]
                    symbol_str_lst = symbol_str.split(' + ')
                    for elocc_idx in range(len(symbol_str_lst)):
                        # Remove any bracketed items in the string
                        symbol_str_lst[elocc_idx] = re.sub(
                            '\([0-9]*\)', '',
                            symbol_str_lst[elocc_idx].strip())

                        # Extract element name and its occupancy from the string, and store it as a
                        # key-value pair in "els_occ".
                        els_occu[str(re.findall('\D+', symbol_str_lst[elocc_idx].strip())[1]).replace('<sup>', '')] = \
                            float('0' + re.findall('\.?\d+', symbol_str_lst[elocc_idx].strip())[1])

                    x = str2float(data["_atom_site_fract_x"][idx])
                    y = str2float(data["_atom_site_fract_y"][idx])
                    z = str2float(data["_atom_site_fract_z"][idx])

                    coord = (x, y, z)
                    # Add each partially occupied element on the site coordinate
                    for et in els_occu:
                        match = get_matching_coord(coord)
                        if not match:
                            coord_to_species[coord] = Composition(
                                {parse_symbol(et): els_occu[parse_symbol(et)]})
                        else:
                            coord_to_species[match] += {
                                parse_symbol(et): els_occu[parse_symbol(et)]
                            }
                    idxs_to_remove.append(idx)

            # Remove the original row by iterating over all keys in the CIF data looking for lists, which indicates
            # multiple data items, one for each row, and remove items from the list that corresponds to the removed row,
            # so that it's not processed by the rest of this function (which would result in an error).
            for cif_key in data.data:
                if type(data.data[cif_key]) == list:
                    for id in sorted(idxs_to_remove, reverse=True):
                        del data.data[cif_key][id]

        ############################################################

        for i in range(len(data["_atom_site_label"])):
            symbol = parse_symbol(data["_atom_site_label"][i])

            if symbol:
                if symbol not in elements and symbol not in special_symbols:
                    symbol = symbol[:2]
            else:
                continue
            # make sure symbol was properly parsed from _atom_site_label
            # otherwise get it from _atom_site_type_symbol
            try:
                if symbol in special_symbols:
                    get_el_sp(special_symbols.get(symbol))
                else:
                    Element(symbol)
            except (KeyError, ValueError):
                # sometimes the site doesn't have the type_symbol.
                # we then hope the type_symbol can be parsed from the label
                if "_atom_site_type_symbol" in data.data.keys():
                    symbol = data["_atom_site_type_symbol"][i]

            if oxi_states is not None:
                if symbol in special_symbols:
                    el = get_el_sp(
                        special_symbols.get(symbol) + str(oxi_states[symbol]))
                else:
                    el = Specie(symbol, oxi_states.get(symbol, 0))
            else:

                el = get_el_sp(special_symbols.get(symbol, symbol))

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1

            if occu > 0:
                coord = (x, y, z)
                match = get_matching_coord(coord)
                if not match:
                    coord_to_species[coord] = Composition({el: occu})
                else:
                    coord_to_species[match] += {el: occu}

        if any([sum(c.values()) > 1 for c in coord_to_species.values()]):
            warnings.warn("Some occupancies sum to > 1! If they are within "
                          "the tolerance, they will be rescaled.")

        allspecies = []
        allcoords = []

        if coord_to_species.items():
            for species, group in groupby(sorted(list(
                    coord_to_species.items()),
                                                 key=lambda x: x[1]),
                                          key=lambda x: x[1]):

                tmp_coords = [site[0] for site in group]

                coords = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])

            # rescale occupancies if necessary
            for i, species in enumerate(allspecies):
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    allspecies[i] = species / totaloccu

        if allspecies and len(allspecies) == len(allcoords):
            struct = Structure(lattice, allspecies, allcoords)
            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct
Example #8
0
    def _get_structure(self, data, primitive):
        """
        Generate structure from part of the cif.
        """
        def get_num_implicit_hydrogens(sym):
            num_h = {"Wat": 2, "wat": 2, "O-H": 1}
            return num_h.get(sym[:3], 0)

        lattice = self.get_lattice(data)

        # if magCIF, get magnetic symmetry moments and magmoms
        # else standard CIF, and use empty magmom dict
        if self.feature_flags["magcif_incommensurate"]:
            raise NotImplementedError(
                "Incommensurate structures not currently supported.")
        elif self.feature_flags["magcif"]:
            self.symmetry_operations = self.get_magsymops(data)
            magmoms = self.parse_magmoms(data, lattice=lattice)
        else:
            self.symmetry_operations = self.get_symops(data)
            magmoms = {}

        oxi_states = self.parse_oxi_states(data)
        coord_to_species = OrderedDict()
        coord_to_magmoms = OrderedDict()

        def get_matching_coord(coord):
            keys = list(coord_to_species.keys())
            coords = np.array(keys)
            for op in self.symmetry_operations:
                c = op.operate(coord)
                inds = find_in_coord_list_pbc(coords,
                                              c,
                                              atol=self._site_tolerance)
                # cant use if inds, because python is dumb and np.array([0]) evaluates
                # to False
                if len(inds):
                    return keys[inds[0]]
            return False

        label_el_dict = {}

        for i in range(len(data["_atom_site_label"])):
            try:
                # If site type symbol exists, use it. Otherwise, we use the
                # label.
                symbol = self._parse_symbol(data["_atom_site_type_symbol"][i])

                label = data["_atom_site_label"][i]

                num_h = get_num_implicit_hydrogens(
                    data["_atom_site_type_symbol"][i])

            except KeyError:
                symbol = self._parse_symbol(data["_atom_site_label"][i])

                label = data["_atom_site_label"][i]

                num_h = get_num_implicit_hydrogens(data["_atom_site_label"][i])
            if not symbol:
                continue

            if oxi_states is not None:
                o_s = oxi_states.get(symbol, 0)
                # use _atom_site_type_symbol if possible for oxidation state
                if "_atom_site_type_symbol" in data.data.keys():
                    oxi_symbol = data["_atom_site_type_symbol"][i]
                    o_s = oxi_states.get(oxi_symbol, o_s)
                try:
                    el = Specie(symbol, o_s)
                except:
                    el = DummySpecie(symbol, o_s)
            else:
                el = get_el_sp(symbol)

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            magmom = magmoms.get(data["_atom_site_label"][i],
                                 np.array([0, 0, 0]))

            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1

            if occu > 0:
                coord = (x, y, z)
                match = get_matching_coord(coord)
                comp_d = {el: occu}

                if num_h > 0:
                    comp_d["H"] = num_h
                comp = Composition(comp_d)
                if not match:
                    coord_to_species[coord] = comp
                    coord_to_magmoms[coord] = magmom
                else:
                    coord_to_species[match] += comp
                    # disordered magnetic not currently supported
                    coord_to_magmoms[match] = None

            label_el_dict[coord] = label

        sum_occu = [
            sum(c.values()) for c in coord_to_species.values() if
            not set(c.elements) == {Element("O"), Element("H")}
        ]
        if any([o > 1 for o in sum_occu]):
            msg = "Some occupancies (%s) sum to > 1! If they are within " \
                    "the tolerance, they will be rescaled." % str(sum_occu)
            warnings.warn(msg)
            self.errors.append(msg)

        allspecies = []
        allcoords = []
        allmagmoms = []
        allhydrogens = []
        alllabels = []

        # check to see if magCIF file is disordered
        if self.feature_flags["magcif"]:
            for k, v in coord_to_magmoms.items():
                if v is None:
                    # Proposed solution to this is to instead store magnetic
                    # moments as Specie 'spin' property, instead of site
                    # property, but this introduces ambiguities for end user
                    # (such as unintended use of `spin` and Specie will have
                    # fictious oxidation state).
                    raise NotImplementedError(
                        'Disordered magnetic structures not currently supported.'
                    )
        if coord_to_species.items():
            for comp, group in groupby(sorted(list(coord_to_species.items()),
                                              key=lambda x: x[1]),
                                       key=lambda x: x[1]):
                tmp_coords = [site[0] for site in group]

                #print(tmp_coords)
                labels = []
                for i in tmp_coords:
                    labels.append(label_el_dict[i])

                #print(labels)
                tmp_magmom = [
                    coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords
                ]
                if self.feature_flags["magcif"]:
                    coords, magmoms, coords_num = self._unique_coords(
                        tmp_coords, magmoms_in=tmp_magmom, lattice=lattice)
                else:
                    coords, magmoms, coords_num = self._unique_coords(
                        tmp_coords)

                if set(comp.elements) == {Element("O"), Element("H")}:
                    # O with implicit hydrogens
                    im_h = comp["H"]
                    species = Composition({"O": comp["O"]})
                else:
                    im_h = 0
                    species = comp

                allhydrogens.extend(len(coords) * [im_h])
                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])
                allmagmoms.extend(magmoms)

                for i in range(len(coords_num)):

                    alllabels.extend(coords_num[i] * [labels[i]])

            # rescale occupancies if necessary
            for i, species in enumerate(allspecies):
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    allspecies[i] = species / totaloccu

        if allspecies and len(allspecies) == len(allcoords) \
                and len(allspecies) == len(allmagmoms):
            site_properties = dict()
            if any(allhydrogens):
                assert len(allhydrogens) == len(allcoords)
                site_properties["implicit_hydrogens"] = allhydrogens

            if self.feature_flags["magcif"]:
                site_properties["magmom"] = allmagmoms

            if len(site_properties) == 0:
                site_properties = None

            struct = Structure(lattice,
                               allspecies,
                               allcoords,
                               site_properties=site_properties)
            #struct = struct.get_sorted_structure()

            if primitive and self.feature_flags['magcif']:
                struct = struct.get_primitive_structure(use_site_props=True)
            elif primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            struct.add_site_property("_atom_site_label", alllabels)
            return struct
Example #9
0
    def _get_structure(self, data, primitive, substitution_dictionary=None):
        """
        Generate structure from part of the cif.
        """
        # Symbols often representing
        # common representations for elements/water in cif files
        special_symbols = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O",
                           "wat": "O"}
        elements = [el.symbol for el in Element]

        lattice = self.get_lattice(data)
        self.symmetry_operations = self.get_symops(data)
        oxi_states = self.parse_oxi_states(data)

        coord_to_species = OrderedDict()

        def parse_symbol(sym):

            if substitution_dictionary:
                return substitution_dictionary.get(sym)
            else:
                m = re.findall(r"w?[A-Z][a-z]*", sym)
                if m and m != "?":
                    return m[0]
                return ""

        def get_matching_coord(coord):
            for op in self.symmetry_operations:
                c = op.operate(coord)
                for k in coord_to_species.keys():
                    if np.allclose(pbc_diff(c, k), (0, 0, 0),
                                   atol=self._site_tolerance):
                        return tuple(k)
            return False

        for i in range(len(data["_atom_site_label"])):
            symbol = parse_symbol(data["_atom_site_label"][i])

            if symbol:
                if symbol not in elements and symbol not in special_symbols:
                    symbol = symbol[:2]
            else:
                continue
            # make sure symbol was properly parsed from _atom_site_label
            # otherwise get it from _atom_site_type_symbol
            try:
                if symbol in special_symbols:
                    get_el_sp(special_symbols.get(symbol))
                else:
                    Element(symbol)
            except (KeyError, ValueError):
                # sometimes the site doesn't have the type_symbol.
                # we then hope the type_symbol can be parsed from the label
                if "_atom_site_type_symbol" in data.data.keys():
                    symbol = data["_atom_site_type_symbol"][i]

            if oxi_states is not None:
                if symbol in special_symbols:
                    el = get_el_sp(special_symbols.get(symbol) +
                                   str(oxi_states[symbol]))
                else:
                    el = Specie(symbol, oxi_states.get(symbol, 0))
            else:

                el = get_el_sp(special_symbols.get(symbol, symbol))

            x = str2float(data["_atom_site_fract_x"][i])
            y = str2float(data["_atom_site_fract_y"][i])
            z = str2float(data["_atom_site_fract_z"][i])
            try:
                occu = str2float(data["_atom_site_occupancy"][i])
            except (KeyError, ValueError):
                occu = 1

            if occu > 0:
                coord = (x, y, z)
                match = get_matching_coord(coord)
                if not match:
                    coord_to_species[coord] = Composition({el: occu})
                else:
                    coord_to_species[match] += {el: occu}

        if any([sum(c.values()) > 1 for c in coord_to_species.values()]):
            warnings.warn("Some occupancies sum to > 1! If they are within "
                          "the tolerance, they will be rescaled.")

        allspecies = []
        allcoords = []

        if coord_to_species.items():
            for species, group in groupby(
                    sorted(list(coord_to_species.items()), key=lambda x: x[1]),
                    key=lambda x: x[1]):

                tmp_coords = [site[0] for site in group]

                coords = self._unique_coords(tmp_coords)

                allcoords.extend(coords)
                allspecies.extend(len(coords) * [species])

            # rescale occupancies if necessary
            for i, species in enumerate(allspecies):
                totaloccu = sum(species.values())
                if 1 < totaloccu <= self._occupancy_tolerance:
                    allspecies[i] = species / totaloccu

        if allspecies and len(allspecies) == len(allcoords):
            struct = Structure(lattice, allspecies, allcoords)
            struct = struct.get_sorted_structure()

            if primitive:
                struct = struct.get_primitive_structure()
                struct = struct.get_reduced_structure()
            return struct