def __init__(self, filename, occupancy_tolerance=1., site_tolerance=1e-4): self._occupancy_tolerance = occupancy_tolerance self._site_tolerance = site_tolerance if isinstance(filename, six.string_types): self._cif = CifFile.from_file(filename) else: self._cif = CifFile.from_string(filename.read()) # store if CIF contains features from non-core CIF dictionaries # e.g. magCIF self.feature_flags = {} self.errors = [] def is_magcif(): """ Checks to see if file appears to be a magCIF file (heuristic). """ # Doesn't seem to be a canonical way to test if file is magCIF or # not, so instead check for magnetic symmetry datanames prefixes = [ '_space_group_magn', '_atom_site_moment', '_space_group_symop_magn' ] for d in self._cif.data.values(): for k in d.data.keys(): for prefix in prefixes: if prefix in k: return True return False self.feature_flags['magcif'] = is_magcif() def is_magcif_incommensurate(): """ Checks to see if file contains an incommensurate magnetic structure (heuristic). """ # Doesn't seem to be a canonical way to test if magCIF file # describes incommensurate strucure or not, so instead check # for common datanames if not self.feature_flags["magcif"]: return False prefixes = ['_cell_modulation_dimension', '_cell_wave_vector'] for d in self._cif.data.values(): for k in d.data.keys(): for prefix in prefixes: if prefix in k: return True return False self.feature_flags['magcif_incommensurate'] = is_magcif_incommensurate( ) for k in self._cif.data.keys(): # pass individual CifBlocks to _sanitize_data self._cif.data[k] = self._sanitize_data(self._cif.data[k])
def get_pmg_dict(cifstring: str): """ use pmg dict to parse cifstring, only deal with one structure per file :param cifstring: :return: """ cifdata = CifFile.from_string(cifstring).data idnetifiers = list(cifdata.keys()) if len(idnetifiers) > 1: warnings.warn('W: find more than 1 structures in this cif file!') elif len(idnetifiers) == 0: warnings.warn('W: no structure found by pymatgen parser!') try: identifier = idnetifiers[0] except IndexError: raise CifFileError('no identifier found in the ciffile!') pymatgen_dict = list(cifdata.items())[0][1].data # jmol writes '_atom_site_type_symbol', but not '_atom_site_label' if '_atom_site_label' not in pymatgen_dict.keys(): warnings.warn('W: _atom_site_label not found in parsed dict') atom_site_label = [] symbols = pymatgen_dict['_atom_site_type_symbol'] for i in range(len(symbols)): s = symbols[i] atom_site_label.append('{}{}'.format(s, i)) pymatgen_dict['_atom_site_label'] = atom_site_label return identifier, pymatgen_dict
def handle_unparsablespecies(cif_string): """ Handles CIF parsing errors arising from unrecognizable species :param cif_string: (str) cif file :return: pymatgen structure object with appended unparsable species """ cif_string_new = '' symbols = [] coords = [] occupancies = [] cif = CifFile.from_string(cif_string).data for block in cif: if 'standardized' in block: cif_stdblock = cif[block] break for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']): if 'OH' in sym: symbols.append(sym) coords.append([ float(cif_stdblock['_atom_site_fract_x'][i]), float(cif_stdblock['_atom_site_fract_y'][i]), float(cif_stdblock['_atom_site_fract_z'][i]) ]) occupancies.append(float(cif_stdblock['_atom_site_occupancy'][i])) for key in cif: cif_string_new += str(cif[key]) + '\n' cif_string_new += '\n' new_struct = CifParser.from_string(cif_string_new).get_structures()[0] for specie_no in range(len(symbols)): new_struct.append({DummySpecie('X'): occupancies[specie_no]}, coords[specie_no], properties={"molecule": [symbols[specie_no]]}) return new_struct
def handle_unparsablespecies(cif_string): """ Handles CIF parsing errors arising from unrecognizable species :param cif_string: (str) cif file :return: pymatgen structure object with appended unparsable species """ cif_string_new = '' symbols = [] coords = [] occupancies = [] cif = CifFile.from_string(cif_string).data for block in cif: if 'standardized' in block: cif_stdblock = cif[block] break for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']): if 'OH' in sym: symbols.append(sym) coords.append([float(cif_stdblock['_atom_site_fract_x'][i]), float(cif_stdblock['_atom_site_fract_y'][i]), float(cif_stdblock['_atom_site_fract_z'][i])]) occupancies.append(float(cif_stdblock['_atom_site_occupancy'][i])) for key in cif: cif_string_new += str(cif[key]) + '\n' cif_string_new += '\n' new_struct = CifParser.from_string(cif_string_new).get_structures()[0] for specie_no in range(len(symbols)): new_struct.append({DummySpecie('X'): occupancies[specie_no]}, coords[specie_no], properties={"molecule": [symbols[specie_no]]}) return new_struct
def get_pmg_dict(cifstring): cifdata = CifFile.from_string(cifstring).data idnetifiers = list(cifdata.keys()) if len(idnetifiers) > 1: warnings.warn('W: find more than 1 structures in this cif file!') elif len(idnetifiers) == 0: warnings.warn('W: no structure found by pymatgen parser!') identifier = idnetifiers[0] pymatgen_dict = list(cifdata.items())[0][1].data return identifier, pymatgen_dict
def make_cif(self): """ Generates a pymatgen CifFile object using structure info parsed from SPuDS output.txt. Returns: cf: pymatgen CifFile object """ # SPuDS ouput structure info site_list,a_lat,b_lat,c_lat,alp,bet,gam = parse_spuds_out(self) # Mapped lattice parameters to .cif compatibility a,b,c,alpha,beta,gamma = map_lattice_menu_1(self, a_lat,b_lat,c_lat, alp,bet,gam) symd = self.symops_dict[self.symops_key] # symops dict data # Create dict of .cif parameters data = {} data['_cell_length_a'] = a data['_cell_length_b'] = b data['_cell_length_c'] = c data['_cell_angle_alpha'] = alpha data['_cell_angle_beta'] = beta data['_cell_angle_gamma'] = gamma data['_space_group_name_H-M_alt'] = symd['name'] data['_symmetry_Int_tables_number'] = symd['number'] data['_symmetry_cell_setting'] = symd['latsym'] data['_space_group_symop_operation_xyz'] = symd['symops'] data['_atom_type_symbol'] = self.ellist data['_atom_type_oxidation_number'] = self.oxilist data['_atom_site_label'] = [d[0] for d in site_list] data['_atom_site_type_symbol'] = [d[1] for d in site_list] data['_atom_site_symmetry_multiplicity'] = [d[2] for d in site_list] data['_atom_site_Wycoff_symbol'] = [d[3] for d in site_list] data['_atom_site_fract_x'] = [d[4] for d in site_list] data['_atom_site_fract_y'] = [d[5] for d in site_list] data['_atom_site_fract_z'] = [d[6] for d in site_list] data['_atom_site_occupancy'] = [d[7] for d in site_list] # .cif file header cif_header = 'SPuDS' # .cif file loops cif_loops = [['_space_group_symop_operation_xyz'], ['_atom_type_symbol','_atom_type_oxidation_number'], ['_atom_site_label','_atom_site_type_symbol', '_atom_site_symmetry_multiplicity', '_atom_site_Wycoff_symbol','_atom_site_fract_x', '_atom_site_fract_y','_atom_site_fract_z', '_atom_site_occupancy']] # Create CifFile object d = OrderedDict() d[self.formula] = CifBlock(data,cif_loops,cif_header) cf = CifFile(d) return cf
def fix_incorrectlyparsedstructures_symbols(cif_string): """ Fixes already parsed CIF files with structures that have data with either incorrect labels. This function will replace the labels with symbols. :param cif_string: (str) cif file :return: corrected cif string """ cif_string_new = '' cif = CifFile.from_string(cif_string).data for block in cif: if 'standardized' in block: cif_stdblock = cif[block] break for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']): if sym != cif_stdblock['_atom_site_label'][i]: cif_stdblock['_atom_site_label'][i] = sym for key in cif: cif_string_new += str(cif[key]) + '\n' cif_string_new += '\n' return cif_string_new
def fix_incorrectlyparsedstructures_manually(cif_string): """ Fixes already parsed CIF files with random errors in them (used for last 8 incorrectly parsed structures). :param cif_string: (str) cif file :return: corrected cif string """ cif_string_new = '' cif = CifFile.from_string(cif_string).data for block in cif: if 'standardized' in block: cif_stdblock = cif[block] break for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']): if sym == 'Mn': cif_stdblock['_atom_site_occupancy'][i] = 0.76 elif sym == 'Ti': cif_stdblock['_atom_site_occupancy'][i] = 0.12 elif sym == 'V': cif_stdblock['_atom_site_occupancy'][i] = 0.12 for key in cif: cif_string_new += str(cif[key]) + '\n' cif_string_new += '\n' return cif_string_new
db = client.springer coll = db['pauling_file_unique_Parse'] newcoll = db['incorrect_labels'] if __name__ == '__main__': d = 0 remove_keys = [] for doc in coll.find({'key': 'sd_1903187'}).batch_size(75).sort('_id', pymongo.ASCENDING).skip(d).limit(500): d += 1 print '#########################' print 'On record # {} and key {}'.format(d, doc['key']) # new_cif_string = fix_incorrectlyparsedstructures_symbols(doc['cif_string']) if 'structure' in doc: print Structure.from_dict(doc['structure']).composition cif = CifFile.from_string(doc['cif_string']).data for block in cif: if 'standardized' in block: cif_stdblock = cif[block] break # print cif_stdblock['_atom_site_label'] # print cif_stdblock['_atom_site_type_symbol'] incorrect_symbol = False for i, sym in enumerate(cif_stdblock['_atom_site_type_symbol']): if sym not in cif_stdblock['_atom_site_label'][i] and ' + ' not in sym: # print sym, cif_stdblock['_atom_site_label'][i] cif_stdblock['_atom_site_label'][i] = sym incorrect_symbol = True if incorrect_symbol: cif_string_new = '' for key in cif:
def __init__(self, structure): """ Args: """ self.input_structure = structure.copy() iso_location = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'iso/') self.input_filename_cif = os.path.join(iso_location, 'findsym_input.mcif') self.output_filename_cif = os.path.join(iso_location, 'findsym.cif') # self.input_filename_findsym = os.path.join(iso_location, 'findsym_in.in') self.input_filename_findsym = 'findsym_in.in' self.write_cif_input() # write initial input from mcif file findsym_command = 'findsym_cifinput ' + self.input_filename_cif logger.debug("""starting isotropy session in {} using isotropy in: {}""".format( os.getcwd(), iso_location)) self.findsym_cifinput_process = Command(os.path.join( iso_location, findsym_command), stdout=Capture(buffer_size=1), env={"ISODATA": iso_location}) try: self.findsym_cifinput_process.run(input=PIPE, async_=False) except FileNotFoundError: raise Exception( "Couldn't find Isotropy (findsym) for Linux, see installation instructions" ) with open(self.input_filename_findsym, "w") as io_file: # move past initial output keep_reading = True while keep_reading: # this_line = self.iso_process.stdout.readline().decode() this_line = self.findsym_cifinput_process.stdout.readline( ).decode() if this_line: # don't log until isotropy responds logger.debug("isotropy: {}".format(this_line)) io_file.write(this_line) else: keep_reading = False # run findsym findsym_command = 'findsym ' + self.input_filename_findsym self.findsym_process = Command(os.path.join(iso_location, findsym_command), stdout=Capture(buffer_size=1), env={"ISODATA": iso_location}) try: self.findsym_process.run(input=PIPE, async_=False) except FileNotFoundError: raise Exception( "Couldn't find Isotropy (findsym) for Linux, see installation instructions" ) # # move past initial output # keep_reading = True # while keep_reading: # this_line = self.findsym_process.stdout.readline().decode() # # this_line = self.read_iso_line() # if this_line: # don't log until isotropy responds # logger.debug("isotropy: {}".format(this_line)) # else: # keep_reading = False self.output_cif_file = CifFile.from_file(self.output_filename_cif) msg_int_symbol = int(self.output_cif_file.data['findsym-output']. data['_symmetry_Int_Tables_number']) self.magnetic_space_group = MagneticSpaceGroup(msg_int_symbol)
def __init__(self, struct, symprec=None, charges=None): """ A wrapper around CifFile to write CIF files from pymatgen structures. Args: struct (Structure): structure to write symprec (float): If not none, finds the symmetry of the structure and writes the cif with symmetry information. Passes symprec to the SpacegroupAnalyzer write_magmoms (bool): If True, will write magCIF file. Incompatible with symprec """ format_str = "{:.8f}" block = OrderedDict() loops = [] spacegroup = ("P 1", 1) if symprec is not None: sf = SpacegroupAnalyzer(struct, symprec) spacegroup = (sf.get_space_group_symbol(), sf.get_space_group_number()) # Needs the refined struture when using symprec. This converts # primitive to conventional structures, the standard for CIF. struct = sf.get_refined_structure() latt = struct.lattice comp = struct.composition no_oxi_comp = comp.element_composition block["_symmetry_space_group_name_H-M"] = spacegroup[0] for cell_attr in ['a', 'b', 'c']: block["_cell_length_" + cell_attr] = format_str.format( getattr(latt, cell_attr)) for cell_attr in ['alpha', 'beta', 'gamma']: block["_cell_angle_" + cell_attr] = format_str.format( getattr(latt, cell_attr)) block["_symmetry_Int_Tables_number"] = spacegroup[1] block["_chemical_formula_structural"] = no_oxi_comp.reduced_formula block["_chemical_formula_sum"] = no_oxi_comp.formula block["_cell_volume"] = "%.8f" % latt.volume reduced_comp, fu = no_oxi_comp.get_reduced_composition_and_factor() block["_cell_formula_units_Z"] = str(int(fu)) if symprec is None: block["_symmetry_equiv_pos_site_id"] = ["1"] block["_symmetry_equiv_pos_as_xyz"] = ["x, y, z"] else: sf = SpacegroupAnalyzer(struct, symprec) symmops = [] for op in sf.get_symmetry_operations(): v = op.translation_vector symmops.append( SymmOp.from_rotation_and_translation( op.rotation_matrix, v)) ops = [op.as_xyz_string() for op in symmops] block["_symmetry_equiv_pos_site_id"] = \ ["%d" % i for i in range(1, len(ops) + 1)] block["_symmetry_equiv_pos_as_xyz"] = ops loops.append( ["_symmetry_equiv_pos_site_id", "_symmetry_equiv_pos_as_xyz"]) try: symbol_to_oxinum = OrderedDict([(el.__str__(), float(el.oxi_state)) for el in sorted(comp.elements)]) block["_atom_type_symbol"] = symbol_to_oxinum.keys() block["_atom_type_oxidation_number"] = symbol_to_oxinum.values() loops.append(["_atom_type_symbol", "_atom_type_oxidation_number"]) except (TypeError, AttributeError): symbol_to_oxinum = OrderedDict([(el.symbol, 0) for el in sorted(comp.elements)]) atom_site_type_symbol = [] atom_site_symmetry_multiplicity = [] atom_site_fract_x = [] atom_site_fract_y = [] atom_site_fract_z = [] atom_site_label = [] atom_site_occupancy = [] atom_site_charge_label = [] count = 1 if symprec is None: for site in struct: for sp, occu in sorted(site.species_and_occu.items()): atom_site_type_symbol.append(sp.__str__()) atom_site_symmetry_multiplicity.append("1") atom_site_fract_x.append("{0:f}".format(site.a)) atom_site_fract_y.append("{0:f}".format(site.b)) atom_site_fract_z.append("{0:f}".format(site.c)) atom_site_label.append("{}{}".format(sp.symbol, count)) atom_site_occupancy.append(occu.__str__()) count += 1 else: # The following just presents a deterministic ordering. unique_sites = [ (sorted(sites, key=lambda s: tuple([abs(x) for x in s.frac_coords]))[0], len(sites)) for sites in sf.get_symmetrized_structure().equivalent_sites ] for site, mult in sorted(unique_sites, key=lambda t: (t[0].species_and_occu.average_electroneg, -t[1], t[0].a, t[0].b, t[0].c)): for sp, occu in site.species_and_occu.items(): atom_site_type_symbol.append(sp.__str__()) atom_site_symmetry_multiplicity.append("%d" % mult) atom_site_fract_x.append("{0:f}".format(site.a)) atom_site_fract_y.append("{0:f}".format(site.b)) atom_site_fract_z.append("{0:f}".format(site.c)) atom_site_label.append("{}{}".format(sp.symbol, count)) atom_site_occupancy.append(occu.__str__()) count += 1 block["_atom_site_type_symbol"] = atom_site_type_symbol block["_atom_site_label"] = atom_site_label block["_atom_site_symmetry_multiplicity"] = \ atom_site_symmetry_multiplicity block["_atom_site_fract_x"] = atom_site_fract_x block["_atom_site_fract_y"] = atom_site_fract_y block["_atom_site_fract_z"] = atom_site_fract_z block["_atom_site_occupancy"] = atom_site_occupancy block["_atom_site_charge"] = charges loops.append([ "_atom_site_type_symbol", "_atom_site_label", "_atom_site_symmetry_multiplicity", "_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z", "_atom_site_occupancy", "_atom_site_charge", ]) d = OrderedDict() d[comp.reduced_formula] = CifBlock(block, loops, comp.reduced_formula) self._cf = CifFile(d)