Example #1
0
    def is_dupe(self, doc, sim_tol=5e-2, extra_pdfs=None):
        """ Compare doc with all other structures at same stoichiometry via PDF overlap.

        Parameters:
            doc (dict): structure to compare.

        Keyword Arguments:
            sim_tol (float): similarity tolerance to compare to
            extra_pdfs (list(dict)): list of structures with extra pdfs
                to compare against

        """
        new_pdf = PDF(doc, projected=True)
        for ind, pdf in enumerate(self.pdfs):
            if sorted(doc["stoichiometry"]) == self._stoichs[ind]:
                dist = new_pdf.get_sim_distance(pdf, projected=True)
                if dist < sim_tol:
                    return True
        if extra_pdfs is not None:
            for ind, _doc in enumerate(extra_pdfs):
                pdf = _doc["pdf"]
                if sorted(doc["stoichiometry"]) == sorted(
                        _doc["stoichiometry"]):
                    dist = new_pdf.get_sim_distance(pdf,
                                                    projected=pdf.projected)
                    if dist < sim_tol:
                        return True
        return False
Example #2
0
 def test_pdf_primitive_vs_supercell(self):
     test_doc, success = res2dict(REAL_PATH + "data/KP_primitive.res",
                                  db=False)
     test_doc["text_id"] = ["primitive", "cell"]
     test_doc["lattice_cart"] = abc2cart(test_doc["lattice_abc"])
     test_doc["cell_volume"] = cart2volume(test_doc["lattice_cart"])
     supercell_doc, success = res2dict(REAL_PATH + "data/KP_supercell.res",
                                       db=False)
     supercell_doc["text_id"] = ["supercell", "cell"]
     supercell_doc["lattice_cart"] = abc2cart(supercell_doc["lattice_abc"])
     supercell_doc["cell_volume"] = cart2volume(
         supercell_doc["lattice_cart"])
     test_doc["pdf"] = PDF(test_doc,
                           dr=0.01,
                           low_mem=True,
                           rmax=10,
                           num_images="auto",
                           debug=DEBUG)
     supercell_doc["pdf"] = PDF(
         supercell_doc,
         dr=0.01,
         low_mem=True,
         rmax=10,
         num_images="auto",
         debug=DEBUG,
     )
     overlap = PDFOverlap(test_doc["pdf"], supercell_doc["pdf"])
     self.assertLessEqual(overlap.similarity_distance, 1e-3)
     self.assertGreaterEqual(overlap.similarity_distance, 0.0)
Example #3
0
def pdf_sim_dist(doc_test, doc_supercell):
    doc_test["text_id"] = ["test", "cell"]
    doc_supercell["text_id"] = ["super", "cell"]
    pdf_test = PDF(doc_test, low_mem=True)
    pdf_supercell = PDF(doc_supercell, low_mem=True)
    overlap = PDFOverlap(pdf_test, pdf_supercell)
    return overlap.similarity_distance
Example #4
0
 def test_pdf_from_projected(self):
     doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res")
     doc["lattice_cart"] = abc2cart(doc["lattice_abc"])
     doc["text_id"] = ["unprojected", "test"]
     doc["pdf_unprojected"] = PDF(doc, dr=0.01, **{"debug": False})
     doc["text_id"] = ["projected", "test"]
     doc["pdf_projected"] = PDF(doc, dr=0.01, **{"debug": False})
     np.testing.assert_array_almost_equal(doc["pdf_unprojected"].gr,
                                          doc["pdf_projected"].gr)
Example #5
0
    def test_single_atom_pdf(self):
        from math import ceil

        doc = dict()
        box_size = 20
        rmax = 41
        dr = 0.1
        num_images = "auto"
        doc["positions_frac"] = [[0.5, 0.5, 0.5]]
        doc["atom_types"] = ["C"]
        doc["lattice_cart"] = np.asarray([[box_size, 0, 0], [0, box_size, 0],
                                          [0, 0, box_size]])
        doc["cell_volume"] = box_size**3
        doc["text_id"] = ["hist", "ogram"]
        doc["pdf"] = PDF(
            doc,
            num_images=num_images,
            dr=dr,
            rmax=rmax,
            lazy=True,
            style="histogram",
            debug=True,
        )
        doc["pdf"].calc_pdf()
        doc["text_id"] = ["smear"]
        doc["pdf_smear"] = PDF(
            doc,
            num_images=num_images,
            gaussian_width=0.01,
            dr=0.1,
            rmax=rmax,
            lazy=True,
            style="smear",
            debug=True,
        )
        doc["pdf_smear"].calc_pdf()
        doc["text_id"] = ["low"]
        doc["pdf_low"] = PDF(
            doc,
            low_mem=True,
            num_images=num_images,
            gaussian_width=0.01,
            dr=0.1,
            rmax=rmax,
            lazy=True,
            style="smear",
            debug=True,
        )
        doc["pdf_low"].calc_pdf()
        peaks = [20, np.sqrt(2) * 20, np.sqrt(3) * 20, 40]
        indices = [ceil(peak / dr) for peak in peaks]
        self.assertListEqual(
            np.where(doc["pdf_low"].gr > 1e-8)[0].tolist(), indices)
        self.assertListEqual(
            np.where(doc["pdf"].gr > 1e-8)[0].tolist(), indices)
        self.assertListEqual(
            np.where(doc["pdf_smear"].gr > 1e-8)[0].tolist(), indices)
Example #6
0
 def calc_pdfs(self):
     """ Compute PDFs for each structure in the generation. """
     self._pdfs = []
     self._stoichs = []
     for structure in self.populace:
         self._pdfs.append(PDF(structure, projected=True))
         self._stoichs.append(sorted(structure["stoichiometry"]))
Example #7
0
    def test_concurrent_pdf(self):
        import glob
        import numpy as np
        import time
        from copy import deepcopy

        files = glob.glob(REAL_PATH + "data/hull-KPSn-KP/*.res")[0:24]
        cursor = [res2dict(file, db=False)[0] for file in files]
        serial_cursor = deepcopy(cursor)
        pdf_args = {
            "dr": 0.1,
            "num_images": "auto",
            "gaussian_width": 0.1,
            "lazy": False,
            "projected": False,
        }
        start = time.time()
        pdf_factory = PDFFactory(cursor, **pdf_args)
        factory_elapsed = time.time() - start
        start = time.time()
        for doc in serial_cursor:
            doc["pdf"] = PDF(doc, **pdf_args, timing=False)
        serial_elapsed = time.time() - start
        print("{:.2f} s over {} processes vs {:.2f} s in serial".format(
            factory_elapsed, pdf_factory.nprocs, serial_elapsed))
        print("Corresponding to a speedup of {:.1f} vs ideal {:.1f}".format(
            serial_elapsed / factory_elapsed, pdf_factory.nprocs))
        for ind, doc in enumerate(serial_cursor):
            np.testing.assert_array_almost_equal(doc["pdf"].gr,
                                                 cursor[ind]["pdf"].gr,
                                                 decimal=6)
Example #8
0
 def calculate_pdf(self, **kwargs):
     """ Calculate and set the PDF with the passed parameters. """
     from matador.fingerprints.pdf import PDF
     if 'pdf' not in self._data:
         self._data['pdf'] = PDF(self._data,
                                 label=self.formula_tex,
                                 **kwargs)
     return self._data['pdf']
Example #9
0
    def pdf(self):
        """ Returns a PDF object (pair distribution function) for the
        structure, calculated with default PDF settings.

        """
        from matador.fingerprints.pdf import PDF
        if 'pdf' not in self._data:
            self._data['pdf'] = PDF(self._data, label=self.formula_tex)
        return self._data['pdf']
Example #10
0
 def test_overlap_smear_vs_hist(self):
     doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res")
     doc["lattice_cart"] = abc2cart(doc["lattice_abc"])
     doc["text_id"] = ["smear", "test"]
     doc["pdf_smear"] = PDF(
         doc,
         num_images=3,
         dr=0.01,
         gaussian_width=0.1,
         projected=False,
         style="smear",
         low_mem=True,
     )
     doc["text_id"] = ["hist", "test"]
     doc["pdf_hist"] = PDF(doc,
                           num_images=3,
                           dr=0.1,
                           projected=False,
                           style="histogram")
     overlap = PDFOverlap(doc["pdf_smear"], doc["pdf_hist"])
     self.assertLessEqual(overlap.similarity_distance, 0.02)
     self.assertGreater(overlap.similarity_distance, 0.0)
Example #11
0
 def test_identity_overlap(self):
     doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res")
     doc["lattice_cart"] = abc2cart(doc["lattice_abc"])
     doc["text_id"] = ["pdf", "test"]
     doc["pdf_smear"] = PDF(
         doc,
         num_images=3,
         dr=0.001,
         gaussian_width=0.1,
         style="smear",
         debug=False,
         low_mem=True,
     )
     overlap = PDFOverlap(doc["pdf_smear"], doc["pdf_smear"])
     self.assertEqual(overlap.similarity_distance, 0.0)
Example #12
0
 def test_auto_images_vs_large(self):
     doc, success = res2dict(REAL_PATH + "data/LiPZn-r57des.res")
     doc["lattice_cart"] = abc2cart(doc["lattice_abc"])
     doc["text_id"] = ["pdf", "test"]
     doc["pdf_num_images"] = PDF(doc,
                                 low_mem=True,
                                 num_images=5,
                                 rmax=15,
                                 dr=0.1,
                                 **{
                                     "debug": True,
                                     "projected": False
                                 })
     doc["pdf_auto_images"] = PDF(doc,
                                  low_mem=True,
                                  num_images="auto",
                                  rmax=15,
                                  dr=0.1,
                                  **{
                                      "debug": True,
                                      "projected": False
                                  })
     np.testing.assert_array_almost_equal(doc["pdf_num_images"].gr,
                                          doc["pdf_auto_images"].gr)
Example #13
0
    def test_concurrent_pdf_stoichs(self):
        import glob
        import numpy as np
        import time
        from copy import deepcopy
        from matador.hull import QueryConvexHull

        files = glob.glob(REAL_PATH + "data/hull-KPSn-KP/*.res")
        cursor = [res2dict(file, db=True)[0] for file in files]
        hull = QueryConvexHull(
            cursor=cursor,
            no_plot=True,
            hull_cutoff=0.5,
            summary=True,
            elements=["K", "Sn", "P"],
            quiet=True,
        )
        serial_cursor = deepcopy(hull.cursor)

        pdf_args = {
            "dr": 0.1,
            "num_images": "auto",
            "gaussian_width": 0.1,
            "lazy": False,
            "projected": False,
        }
        start = time.time()
        pdf_factory = PDFFactory(hull.cursor, **pdf_args)
        factory_elapsed = time.time() - start
        start = time.time()
        for doc in serial_cursor:
            doc["pdf"] = PDF(doc, **pdf_args, timing=False)
        serial_elapsed = time.time() - start
        print("{:.2f} s over {} processes vs {:.2f} s in serial".format(
            factory_elapsed, pdf_factory.nprocs, serial_elapsed))
        print("Corresponding to a speedup of {:.1f} vs ideal {:.1f}".format(
            serial_elapsed / factory_elapsed, pdf_factory.nprocs))
        for ind, doc in enumerate(serial_cursor):
            np.testing.assert_array_almost_equal(doc["pdf"].gr,
                                                 hull.cursor[ind]["pdf"].gr,
                                                 decimal=6)
Example #14
0
    def test_ideal_gas_pdf(self, retry=0):
        """ DEPRECATED.

        Slow, and not very useful.

        """
        # create fake matador doc
        doc = dict()
        max_retries = 1
        self.assertLess(
            retry,
            max_retries,
            msg="After {} attempts, PDF still failed.".format(retry))
        num_atoms = 100
        box_size = 10
        num_samples = 1
        rmax = 15
        dr = 0.01
        num_images = "auto"
        i = 0
        doc["atom_types"] = num_atoms * ["C"]
        doc["lattice_cart"] = np.asarray([[box_size, 0, 0], [0, box_size, 0],
                                          [0, 0, box_size]])
        doc["cell_volume"] = box_size**3
        doc["text_id"] = ["ideal", "gas"]
        while i < num_samples:
            doc["positions_frac"] = np.random.rand(num_atoms, 3)
            doc["text_id"] = "hist"
            doc["pdf"] = PDF(
                doc,
                num_images=num_images,
                dr=dr,
                rmax=rmax,
                lazy=True,
                style="histogram",
                debug=True,
            )
            doc["pdf"].calc_pdf()
            self.assertAlmostEqual(np.mean(doc["pdf"].gr[50:]), 1.0, places=1)
Example #15
0
def _cif_set_unreduced_sites(doc):
    """ Expands sites by symmetry operations found under the key
    `symemtry_equiv_pos_as_xyz` in the cif_dict.

    Parameters:
        doc (dict): matador document to modify. Must contain symops
            under doc['_cif']['_symmetry_equiv_pos_as_xyz']. This doc
            is updated with new `positions_frac`, `num_atoms`, `atom_types`
            and `site_occupancy`.

    """
    from matador.utils.cell_utils import wrap_frac_coords
    from matador.utils.cell_utils import calc_pairwise_distances_pbc
    from matador.fingerprints.pdf import PDF

    species_sites = dict()
    species_occ = dict()

    symmetry_ops = []
    symmetry_functions = []

    def _apply_sym_op(x=None, y=None, z=None, symmetry=None):
        """ Returns the site after the applied symmetry operation, in string representation. """
        # cannot use a listcomp here due to interplay with functools
        return [eval(symmetry[0]), eval(symmetry[1]), eval(symmetry[2])]

    for symmetry in doc['_cif']['_symmetry_equiv_pos_as_xyz']:
        symmetry = tuple(elem.strip() for elem in symmetry.strip('\'').split(','))
        # check the element before doing an eval, as it is so unsafe
        allowed_chars = ['x', 'y', 'z', '.', '/', '+', '-',
                         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
        for element in symmetry:
            for character in element:
                if character not in allowed_chars:
                    raise RuntimeError('You are trying to do something naughty with the symmetry element {}'
                                       .format(element))

        symmetry_ops.append(symmetry)
        symmetry_functions.append(functools.partial(_apply_sym_op, symmetry=symmetry))

    for ind, site in enumerate(doc['positions_frac']):
        species = doc['atom_types'][ind]
        occupancy = doc['site_occupancy'][ind]
        if doc['atom_types'][ind] not in species_sites:
            species_sites[species] = []
            species_occ[species] = []
        for symmetry in symmetry_functions:
            x, y, z = site
            new_site = symmetry(x=x, y=y, z=z)
            new_site = wrap_frac_coords([new_site])[0]
            species_sites[species].append(new_site)
            species_occ[species].append(occupancy)

    unreduced_sites = []
    unreduced_occupancies = []
    unreduced_species = []

    # this loop assumes that no symmetry operation can map 2 unlike sites upon one another
    for species in species_sites:
        unreduced_sites.extend(species_sites[species])
        unreduced_occupancies.extend(species_occ[species])
        unreduced_species.extend(len(species_sites[species]) * [species])

    # check that the symmetry procedure has not generated overlapping atoms
    # this can happen for certain symmetries/cells if positions are not
    # reported to sufficient precision
    images = PDF._get_image_trans_vectors_auto(
        doc['lattice_cart'],
        0.1, 0.01, max_num_images=1,
    )

    poscarts = frac2cart(doc['lattice_cart'], unreduced_sites)
    distances = calc_pairwise_distances_pbc(
        poscarts,
        images,
        doc['lattice_cart'],
        0.01,
        compress=False,
        filter_zero=False,
        per_image=True
    )

    dupe_set = set()
    for img in distances:
        try:
            i_s, j_s = np.where(~img.mask)
        except ValueError:
            # ValueError will be raised if there is only one atom as i_s, j_s cannot be unpacked
            continue
        for i, j in zip(i_s, j_s):
            if i == j:
                continue
            else:
                # sites can overlap if they have partial occupancy
                if i not in dupe_set and unreduced_species[i] == unreduced_species[j]:
                    dupe_set.add(j)

    doc['positions_frac'] = unreduced_sites
    doc['site_occupancy'] = unreduced_occupancies
    doc['atom_types'] = unreduced_species

    doc['site_occupancy'] = [
        atom for ind, atom in enumerate(unreduced_occupancies) if ind not in dupe_set
    ]
    doc['atom_types'] = [
        atom for ind, atom in enumerate(unreduced_species) if ind not in dupe_set
    ]
    doc['positions_frac'] = [
        atom for ind, atom in enumerate(unreduced_sites) if ind not in dupe_set
    ]

    _num_atoms = np.sum(doc['site_occupancy'])
    if abs(_num_atoms - round(_num_atoms, 0)) < EPS:
        _num_atoms = int(round(_num_atoms, 0))
    doc['num_atoms'] = _num_atoms

    if len(doc['site_occupancy']) != len(doc['positions_frac']):
        raise RuntimeError('Size mismatch between positions and occs, {} vs {}'
                           .format(len(doc['site_occupancy']), len(doc['positions_frac'])))
    if len(doc['positions_frac']) != len(doc['atom_types']):
        raise RuntimeError('Size mismatch between positions and types')