def is_dupe(self, doc, sim_tol=5e-2, extra_pdfs=None): """ Compare doc with all other structures at same stoichiometry via PDF overlap. Parameters: doc (dict): structure to compare. Keyword Arguments: sim_tol (float): similarity tolerance to compare to extra_pdfs (list(dict)): list of structures with extra pdfs to compare against """ new_pdf = PDF(doc, projected=True) for ind, pdf in enumerate(self.pdfs): if sorted(doc["stoichiometry"]) == self._stoichs[ind]: dist = new_pdf.get_sim_distance(pdf, projected=True) if dist < sim_tol: return True if extra_pdfs is not None: for ind, _doc in enumerate(extra_pdfs): pdf = _doc["pdf"] if sorted(doc["stoichiometry"]) == sorted( _doc["stoichiometry"]): dist = new_pdf.get_sim_distance(pdf, projected=pdf.projected) if dist < sim_tol: return True return False
def test_pdf_primitive_vs_supercell(self): test_doc, success = res2dict(REAL_PATH + "data/KP_primitive.res", db=False) test_doc["text_id"] = ["primitive", "cell"] test_doc["lattice_cart"] = abc2cart(test_doc["lattice_abc"]) test_doc["cell_volume"] = cart2volume(test_doc["lattice_cart"]) supercell_doc, success = res2dict(REAL_PATH + "data/KP_supercell.res", db=False) supercell_doc["text_id"] = ["supercell", "cell"] supercell_doc["lattice_cart"] = abc2cart(supercell_doc["lattice_abc"]) supercell_doc["cell_volume"] = cart2volume( supercell_doc["lattice_cart"]) test_doc["pdf"] = PDF(test_doc, dr=0.01, low_mem=True, rmax=10, num_images="auto", debug=DEBUG) supercell_doc["pdf"] = PDF( supercell_doc, dr=0.01, low_mem=True, rmax=10, num_images="auto", debug=DEBUG, ) overlap = PDFOverlap(test_doc["pdf"], supercell_doc["pdf"]) self.assertLessEqual(overlap.similarity_distance, 1e-3) self.assertGreaterEqual(overlap.similarity_distance, 0.0)
def pdf_sim_dist(doc_test, doc_supercell): doc_test["text_id"] = ["test", "cell"] doc_supercell["text_id"] = ["super", "cell"] pdf_test = PDF(doc_test, low_mem=True) pdf_supercell = PDF(doc_supercell, low_mem=True) overlap = PDFOverlap(pdf_test, pdf_supercell) return overlap.similarity_distance
def test_pdf_from_projected(self): doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res") doc["lattice_cart"] = abc2cart(doc["lattice_abc"]) doc["text_id"] = ["unprojected", "test"] doc["pdf_unprojected"] = PDF(doc, dr=0.01, **{"debug": False}) doc["text_id"] = ["projected", "test"] doc["pdf_projected"] = PDF(doc, dr=0.01, **{"debug": False}) np.testing.assert_array_almost_equal(doc["pdf_unprojected"].gr, doc["pdf_projected"].gr)
def test_single_atom_pdf(self): from math import ceil doc = dict() box_size = 20 rmax = 41 dr = 0.1 num_images = "auto" doc["positions_frac"] = [[0.5, 0.5, 0.5]] doc["atom_types"] = ["C"] doc["lattice_cart"] = np.asarray([[box_size, 0, 0], [0, box_size, 0], [0, 0, box_size]]) doc["cell_volume"] = box_size**3 doc["text_id"] = ["hist", "ogram"] doc["pdf"] = PDF( doc, num_images=num_images, dr=dr, rmax=rmax, lazy=True, style="histogram", debug=True, ) doc["pdf"].calc_pdf() doc["text_id"] = ["smear"] doc["pdf_smear"] = PDF( doc, num_images=num_images, gaussian_width=0.01, dr=0.1, rmax=rmax, lazy=True, style="smear", debug=True, ) doc["pdf_smear"].calc_pdf() doc["text_id"] = ["low"] doc["pdf_low"] = PDF( doc, low_mem=True, num_images=num_images, gaussian_width=0.01, dr=0.1, rmax=rmax, lazy=True, style="smear", debug=True, ) doc["pdf_low"].calc_pdf() peaks = [20, np.sqrt(2) * 20, np.sqrt(3) * 20, 40] indices = [ceil(peak / dr) for peak in peaks] self.assertListEqual( np.where(doc["pdf_low"].gr > 1e-8)[0].tolist(), indices) self.assertListEqual( np.where(doc["pdf"].gr > 1e-8)[0].tolist(), indices) self.assertListEqual( np.where(doc["pdf_smear"].gr > 1e-8)[0].tolist(), indices)
def calc_pdfs(self): """ Compute PDFs for each structure in the generation. """ self._pdfs = [] self._stoichs = [] for structure in self.populace: self._pdfs.append(PDF(structure, projected=True)) self._stoichs.append(sorted(structure["stoichiometry"]))
def test_concurrent_pdf(self): import glob import numpy as np import time from copy import deepcopy files = glob.glob(REAL_PATH + "data/hull-KPSn-KP/*.res")[0:24] cursor = [res2dict(file, db=False)[0] for file in files] serial_cursor = deepcopy(cursor) pdf_args = { "dr": 0.1, "num_images": "auto", "gaussian_width": 0.1, "lazy": False, "projected": False, } start = time.time() pdf_factory = PDFFactory(cursor, **pdf_args) factory_elapsed = time.time() - start start = time.time() for doc in serial_cursor: doc["pdf"] = PDF(doc, **pdf_args, timing=False) serial_elapsed = time.time() - start print("{:.2f} s over {} processes vs {:.2f} s in serial".format( factory_elapsed, pdf_factory.nprocs, serial_elapsed)) print("Corresponding to a speedup of {:.1f} vs ideal {:.1f}".format( serial_elapsed / factory_elapsed, pdf_factory.nprocs)) for ind, doc in enumerate(serial_cursor): np.testing.assert_array_almost_equal(doc["pdf"].gr, cursor[ind]["pdf"].gr, decimal=6)
def calculate_pdf(self, **kwargs): """ Calculate and set the PDF with the passed parameters. """ from matador.fingerprints.pdf import PDF if 'pdf' not in self._data: self._data['pdf'] = PDF(self._data, label=self.formula_tex, **kwargs) return self._data['pdf']
def pdf(self): """ Returns a PDF object (pair distribution function) for the structure, calculated with default PDF settings. """ from matador.fingerprints.pdf import PDF if 'pdf' not in self._data: self._data['pdf'] = PDF(self._data, label=self.formula_tex) return self._data['pdf']
def test_overlap_smear_vs_hist(self): doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res") doc["lattice_cart"] = abc2cart(doc["lattice_abc"]) doc["text_id"] = ["smear", "test"] doc["pdf_smear"] = PDF( doc, num_images=3, dr=0.01, gaussian_width=0.1, projected=False, style="smear", low_mem=True, ) doc["text_id"] = ["hist", "test"] doc["pdf_hist"] = PDF(doc, num_images=3, dr=0.1, projected=False, style="histogram") overlap = PDFOverlap(doc["pdf_smear"], doc["pdf_hist"]) self.assertLessEqual(overlap.similarity_distance, 0.02) self.assertGreater(overlap.similarity_distance, 0.0)
def test_identity_overlap(self): doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res") doc["lattice_cart"] = abc2cart(doc["lattice_abc"]) doc["text_id"] = ["pdf", "test"] doc["pdf_smear"] = PDF( doc, num_images=3, dr=0.001, gaussian_width=0.1, style="smear", debug=False, low_mem=True, ) overlap = PDFOverlap(doc["pdf_smear"], doc["pdf_smear"]) self.assertEqual(overlap.similarity_distance, 0.0)
def test_auto_images_vs_large(self): doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res") doc["lattice_cart"] = abc2cart(doc["lattice_abc"]) doc["text_id"] = ["pdf", "test"] doc["pdf_num_images"] = PDF(doc, low_mem=True, num_images=5, rmax=15, dr=0.1, **{ "debug": True, "projected": False }) doc["pdf_auto_images"] = PDF(doc, low_mem=True, num_images="auto", rmax=15, dr=0.1, **{ "debug": True, "projected": False }) np.testing.assert_array_almost_equal(doc["pdf_num_images"].gr, doc["pdf_auto_images"].gr)
def test_concurrent_pdf_stoichs(self): import glob import numpy as np import time from copy import deepcopy from matador.hull import QueryConvexHull files = glob.glob(REAL_PATH + "data/hull-KPSn-KP/*.res") cursor = [res2dict(file, db=True)[0] for file in files] hull = QueryConvexHull( cursor=cursor, no_plot=True, hull_cutoff=0.5, summary=True, elements=["K", "Sn", "P"], quiet=True, ) serial_cursor = deepcopy(hull.cursor) pdf_args = { "dr": 0.1, "num_images": "auto", "gaussian_width": 0.1, "lazy": False, "projected": False, } start = time.time() pdf_factory = PDFFactory(hull.cursor, **pdf_args) factory_elapsed = time.time() - start start = time.time() for doc in serial_cursor: doc["pdf"] = PDF(doc, **pdf_args, timing=False) serial_elapsed = time.time() - start print("{:.2f} s over {} processes vs {:.2f} s in serial".format( factory_elapsed, pdf_factory.nprocs, serial_elapsed)) print("Corresponding to a speedup of {:.1f} vs ideal {:.1f}".format( serial_elapsed / factory_elapsed, pdf_factory.nprocs)) for ind, doc in enumerate(serial_cursor): np.testing.assert_array_almost_equal(doc["pdf"].gr, hull.cursor[ind]["pdf"].gr, decimal=6)
def test_ideal_gas_pdf(self, retry=0): """ DEPRECATED. Slow, and not very useful. """ # create fake matador doc doc = dict() max_retries = 1 self.assertLess( retry, max_retries, msg="After {} attempts, PDF still failed.".format(retry)) num_atoms = 100 box_size = 10 num_samples = 1 rmax = 15 dr = 0.01 num_images = "auto" i = 0 doc["atom_types"] = num_atoms * ["C"] doc["lattice_cart"] = np.asarray([[box_size, 0, 0], [0, box_size, 0], [0, 0, box_size]]) doc["cell_volume"] = box_size**3 doc["text_id"] = ["ideal", "gas"] while i < num_samples: doc["positions_frac"] = np.random.rand(num_atoms, 3) doc["text_id"] = "hist" doc["pdf"] = PDF( doc, num_images=num_images, dr=dr, rmax=rmax, lazy=True, style="histogram", debug=True, ) doc["pdf"].calc_pdf() self.assertAlmostEqual(np.mean(doc["pdf"].gr[50:]), 1.0, places=1)
def _cif_set_unreduced_sites(doc): """ Expands sites by symmetry operations found under the key `symemtry_equiv_pos_as_xyz` in the cif_dict. Parameters: doc (dict): matador document to modify. Must contain symops under doc['_cif']['_symmetry_equiv_pos_as_xyz']. This doc is updated with new `positions_frac`, `num_atoms`, `atom_types` and `site_occupancy`. """ from matador.utils.cell_utils import wrap_frac_coords from matador.utils.cell_utils import calc_pairwise_distances_pbc from matador.fingerprints.pdf import PDF species_sites = dict() species_occ = dict() symmetry_ops = [] symmetry_functions = [] def _apply_sym_op(x=None, y=None, z=None, symmetry=None): """ Returns the site after the applied symmetry operation, in string representation. """ # cannot use a listcomp here due to interplay with functools return [eval(symmetry[0]), eval(symmetry[1]), eval(symmetry[2])] for symmetry in doc['_cif']['_symmetry_equiv_pos_as_xyz']: symmetry = tuple(elem.strip() for elem in symmetry.strip('\'').split(',')) # check the element before doing an eval, as it is so unsafe allowed_chars = ['x', 'y', 'z', '.', '/', '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] for element in symmetry: for character in element: if character not in allowed_chars: raise RuntimeError('You are trying to do something naughty with the symmetry element {}' .format(element)) symmetry_ops.append(symmetry) symmetry_functions.append(functools.partial(_apply_sym_op, symmetry=symmetry)) for ind, site in enumerate(doc['positions_frac']): species = doc['atom_types'][ind] occupancy = doc['site_occupancy'][ind] if doc['atom_types'][ind] not in species_sites: species_sites[species] = [] species_occ[species] = [] for symmetry in symmetry_functions: x, y, z = site new_site = symmetry(x=x, y=y, z=z) new_site = wrap_frac_coords([new_site])[0] species_sites[species].append(new_site) species_occ[species].append(occupancy) unreduced_sites = [] unreduced_occupancies = [] unreduced_species = [] # this loop assumes that no symmetry operation can map 2 unlike sites upon one another for species in species_sites: unreduced_sites.extend(species_sites[species]) unreduced_occupancies.extend(species_occ[species]) unreduced_species.extend(len(species_sites[species]) * [species]) # check that the symmetry procedure has not generated overlapping atoms # this can happen for certain symmetries/cells if positions are not # reported to sufficient precision images = PDF._get_image_trans_vectors_auto( doc['lattice_cart'], 0.1, 0.01, max_num_images=1, ) poscarts = frac2cart(doc['lattice_cart'], unreduced_sites) distances = calc_pairwise_distances_pbc( poscarts, images, doc['lattice_cart'], 0.01, compress=False, filter_zero=False, per_image=True ) dupe_set = set() for img in distances: try: i_s, j_s = np.where(~img.mask) except ValueError: # ValueError will be raised if there is only one atom as i_s, j_s cannot be unpacked continue for i, j in zip(i_s, j_s): if i == j: continue else: # sites can overlap if they have partial occupancy if i not in dupe_set and unreduced_species[i] == unreduced_species[j]: dupe_set.add(j) doc['positions_frac'] = unreduced_sites doc['site_occupancy'] = unreduced_occupancies doc['atom_types'] = unreduced_species doc['site_occupancy'] = [ atom for ind, atom in enumerate(unreduced_occupancies) if ind not in dupe_set ] doc['atom_types'] = [ atom for ind, atom in enumerate(unreduced_species) if ind not in dupe_set ] doc['positions_frac'] = [ atom for ind, atom in enumerate(unreduced_sites) if ind not in dupe_set ] _num_atoms = np.sum(doc['site_occupancy']) if abs(_num_atoms - round(_num_atoms, 0)) < EPS: _num_atoms = int(round(_num_atoms, 0)) doc['num_atoms'] = _num_atoms if len(doc['site_occupancy']) != len(doc['positions_frac']): raise RuntimeError('Size mismatch between positions and occs, {} vs {}' .format(len(doc['site_occupancy']), len(doc['positions_frac']))) if len(doc['positions_frac']) != len(doc['atom_types']): raise RuntimeError('Size mismatch between positions and types')