def handle_unparsablespecies(cif_string):
    """
    Handles CIF parsing errors arising from unrecognizable species

    :param cif_string: (str) cif file
    :return: pymatgen structure object with appended unparsable species
    """
    cif_string_new = ''
    symbols = []
    coords = []
    occupancies = []
    cif = CifFile.from_string(cif_string).data
    for block in cif:
        if 'standardized' in block:
            cif_stdblock = cif[block]
            break
    for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']):
        if 'OH' in sym:
            symbols.append(sym)
            coords.append([float(cif_stdblock['_atom_site_fract_x'][i]), float(cif_stdblock['_atom_site_fract_y'][i]),
                           float(cif_stdblock['_atom_site_fract_z'][i])])
            occupancies.append(float(cif_stdblock['_atom_site_occupancy'][i]))
    for key in cif:
        cif_string_new += str(cif[key]) + '\n'
        cif_string_new += '\n'
    new_struct = CifParser.from_string(cif_string_new).get_structures()[0]
    for specie_no in range(len(symbols)):
        new_struct.append({DummySpecie('X'): occupancies[specie_no]}, coords[specie_no],
                          properties={"molecule": [symbols[specie_no]]})
    return new_struct
def get_pmg_dict(cifstring: str):
    """
    use pmg dict to parse cifstring, only deal with one structure per file

    :param cifstring:
    :return:
    """
    cifdata = CifFile.from_string(cifstring).data
    idnetifiers = list(cifdata.keys())
    if len(idnetifiers) > 1:
        warnings.warn('W: find more than 1 structures in this cif file!')
    elif len(idnetifiers) == 0:
        warnings.warn('W: no structure found by pymatgen parser!')
    try:
        identifier = idnetifiers[0]
    except IndexError:
        raise CifFileError('no identifier found in the ciffile!')
    pymatgen_dict = list(cifdata.items())[0][1].data

    # jmol writes '_atom_site_type_symbol', but not '_atom_site_label'
    if '_atom_site_label' not in pymatgen_dict.keys():
        warnings.warn('W: _atom_site_label not found in parsed dict')
        atom_site_label = []
        symbols = pymatgen_dict['_atom_site_type_symbol']
        for i in range(len(symbols)):
            s = symbols[i]
            atom_site_label.append('{}{}'.format(s, i))
        pymatgen_dict['_atom_site_label'] = atom_site_label
    return identifier, pymatgen_dict
Beispiel #3
0
def handle_unparsablespecies(cif_string):
    """
    Handles CIF parsing errors arising from unrecognizable species

    :param cif_string: (str) cif file
    :return: pymatgen structure object with appended unparsable species
    """
    cif_string_new = ''
    symbols = []
    coords = []
    occupancies = []
    cif = CifFile.from_string(cif_string).data
    for block in cif:
        if 'standardized' in block:
            cif_stdblock = cif[block]
            break
    for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']):
        if 'OH' in sym:
            symbols.append(sym)
            coords.append([
                float(cif_stdblock['_atom_site_fract_x'][i]),
                float(cif_stdblock['_atom_site_fract_y'][i]),
                float(cif_stdblock['_atom_site_fract_z'][i])
            ])
            occupancies.append(float(cif_stdblock['_atom_site_occupancy'][i]))
    for key in cif:
        cif_string_new += str(cif[key]) + '\n'
        cif_string_new += '\n'
    new_struct = CifParser.from_string(cif_string_new).get_structures()[0]
    for specie_no in range(len(symbols)):
        new_struct.append({DummySpecie('X'): occupancies[specie_no]},
                          coords[specie_no],
                          properties={"molecule": [symbols[specie_no]]})
    return new_struct
Beispiel #4
0
    def __init__(self, filename, occupancy_tolerance=1., site_tolerance=1e-4):
        self._occupancy_tolerance = occupancy_tolerance
        self._site_tolerance = site_tolerance
        if isinstance(filename, six.string_types):
            self._cif = CifFile.from_file(filename)
        else:
            self._cif = CifFile.from_string(filename.read())

        # store if CIF contains features from non-core CIF dictionaries
        # e.g. magCIF
        self.feature_flags = {}
        self.errors = []

        def is_magcif():
            """
            Checks to see if file appears to be a magCIF file (heuristic).
            """
            # Doesn't seem to be a canonical way to test if file is magCIF or
            # not, so instead check for magnetic symmetry datanames
            prefixes = [
                '_space_group_magn', '_atom_site_moment',
                '_space_group_symop_magn'
            ]
            for d in self._cif.data.values():
                for k in d.data.keys():
                    for prefix in prefixes:
                        if prefix in k:
                            return True
            return False

        self.feature_flags['magcif'] = is_magcif()

        def is_magcif_incommensurate():
            """
            Checks to see if file contains an incommensurate magnetic
            structure (heuristic).
            """
            # Doesn't seem to be a canonical way to test if magCIF file
            # describes incommensurate strucure or not, so instead check
            # for common datanames
            if not self.feature_flags["magcif"]:
                return False
            prefixes = ['_cell_modulation_dimension', '_cell_wave_vector']
            for d in self._cif.data.values():
                for k in d.data.keys():
                    for prefix in prefixes:
                        if prefix in k:
                            return True
            return False

        self.feature_flags['magcif_incommensurate'] = is_magcif_incommensurate(
        )

        for k in self._cif.data.keys():
            # pass individual CifBlocks to _sanitize_data
            self._cif.data[k] = self._sanitize_data(self._cif.data[k])
Beispiel #5
0
def get_pmg_dict(cifstring):
    cifdata = CifFile.from_string(cifstring).data
    idnetifiers = list(cifdata.keys())
    if len(idnetifiers) > 1:
        warnings.warn('W: find more than 1 structures in this cif file!')
    elif len(idnetifiers) == 0:
        warnings.warn('W: no structure found by pymatgen parser!')
    identifier = idnetifiers[0]
    pymatgen_dict = list(cifdata.items())[0][1].data
    return identifier, pymatgen_dict
Beispiel #6
0
def fix_incorrectlyparsedstructures_symbols(cif_string):
    """
    Fixes already parsed CIF files with structures that have data with either incorrect labels. This function will
    replace the labels with symbols.

    :param cif_string: (str) cif file
    :return: corrected cif string
    """
    cif_string_new = ''
    cif = CifFile.from_string(cif_string).data
    for block in cif:
        if 'standardized' in block:
            cif_stdblock = cif[block]
            break
    for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']):
        if sym != cif_stdblock['_atom_site_label'][i]:
            cif_stdblock['_atom_site_label'][i] = sym
    for key in cif:
        cif_string_new += str(cif[key]) + '\n'
        cif_string_new += '\n'
    return cif_string_new
def fix_incorrectlyparsedstructures_symbols(cif_string):
    """
    Fixes already parsed CIF files with structures that have data with either incorrect labels. This function will
    replace the labels with symbols.

    :param cif_string: (str) cif file
    :return: corrected cif string
    """
    cif_string_new = ''
    cif = CifFile.from_string(cif_string).data
    for block in cif:
        if 'standardized' in block:
            cif_stdblock = cif[block]
            break
    for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']):
        if sym != cif_stdblock['_atom_site_label'][i]:
            cif_stdblock['_atom_site_label'][i] = sym
    for key in cif:
        cif_string_new += str(cif[key]) + '\n'
        cif_string_new += '\n'
    return cif_string_new
Beispiel #8
0
def fix_incorrectlyparsedstructures_manually(cif_string):
    """
    Fixes already parsed CIF files with random errors in them (used for last 8 incorrectly parsed structures).

    :param cif_string: (str) cif file
    :return: corrected cif string
    """
    cif_string_new = ''
    cif = CifFile.from_string(cif_string).data
    for block in cif:
        if 'standardized' in block:
            cif_stdblock = cif[block]
            break
    for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']):
        if sym == 'Mn':
            cif_stdblock['_atom_site_occupancy'][i] = 0.76
        elif sym == 'Ti':
            cif_stdblock['_atom_site_occupancy'][i] = 0.12
        elif sym == 'V':
            cif_stdblock['_atom_site_occupancy'][i] = 0.12
    for key in cif:
        cif_string_new += str(cif[key]) + '\n'
        cif_string_new += '\n'
    return cif_string_new
def fix_incorrectlyparsedstructures_manually(cif_string):
    """
    Fixes already parsed CIF files with random errors in them (used for last 8 incorrectly parsed structures).

    :param cif_string: (str) cif file
    :return: corrected cif string
    """
    cif_string_new = ''
    cif = CifFile.from_string(cif_string).data
    for block in cif:
        if 'standardized' in block:
            cif_stdblock = cif[block]
            break
    for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']):
        if sym == 'Mn':
            cif_stdblock['_atom_site_occupancy'][i] = 0.76
        elif sym == 'Ti':
            cif_stdblock['_atom_site_occupancy'][i] = 0.12
        elif sym == 'V':
            cif_stdblock['_atom_site_occupancy'][i] = 0.12
    for key in cif:
        cif_string_new += str(cif[key]) + '\n'
        cif_string_new += '\n'
    return cif_string_new
db = client.springer
coll = db['pauling_file_unique_Parse']
newcoll = db['incorrect_labels']


if __name__ == '__main__':
    d = 0
    remove_keys = []
    for doc in coll.find({'key': 'sd_1903187'}).batch_size(75).sort('_id', pymongo.ASCENDING).skip(d).limit(500):
        d += 1
        print '#########################'
        print 'On record # {} and key {}'.format(d, doc['key'])
        # new_cif_string = fix_incorrectlyparsedstructures_symbols(doc['cif_string'])
        if 'structure' in doc:
            print Structure.from_dict(doc['structure']).composition
            cif = CifFile.from_string(doc['cif_string']).data
            for block in cif:
                if 'standardized' in block:
                    cif_stdblock = cif[block]
                    break
            # print cif_stdblock['_atom_site_label']
            # print cif_stdblock['_atom_site_type_symbol']
            incorrect_symbol = False
            for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']):
                if sym not in cif_stdblock['_atom_site_label'][i] and ' + ' not in sym:
                    # print sym, cif_stdblock['_atom_site_label'][i]
                    cif_stdblock['_atom_site_label'][i] = sym
                    incorrect_symbol = True
            if incorrect_symbol:
                cif_string_new = ''
                for key in cif: