def test_spg_standardize(self): from matador.utils.cell_utils import standardize_doc_cell from matador.scrapers import cif2dict import glob doc, s = castep2dict(REAL_PATH + "data/Na3Zn4-swap-ReOs-OQMD_759599.castep") std_doc = standardize_doc_cell(doc) dist = pdf_sim_dist(doc, std_doc) self.assertLess(dist, 0.01) fnames = glob.glob(REAL_PATH + "data/bs_test/*.res") for fname in fnames: doc, s = res2dict(fname, db=False) doc["cell_volume"] = cart2volume(doc["lattice_cart"]) std_doc = standardize_doc_cell(doc) dist = pdf_sim_dist(doc, std_doc) self.assertLess(dist, 0.01) doc = Crystal( castep2dict(REAL_PATH + "data/Na3Zn4-swap-ReOs-OQMD_759599.castep")[0]) std_doc = standardize_doc_cell(doc) dist = pdf_sim_dist(doc, std_doc) self.assertLess(dist, 0.01) doc = Crystal(cif2dict(REAL_PATH + "data/cif_files/AgBiI.cif")[0]) with self.assertRaises(RuntimeError): std_doc = standardize_doc_cell(doc)
def construct_thermodynamics(self, doc: Crystal) -> MatadorThermodynamics: doc._data["enthalpy"] = doc._data["enthalpy_per_atom"] doc._data["total_energy"] = doc._data["total_energy_per_atom"] doc._data["formation_energy"] = doc._data.get( "formation_energy_per_atom") if doc._data["formation_energy"] is None: doc._data["formation_energy"] = doc._data.get( "formation_enthalpy_per_atom") return MatadorThermodynamics(**doc._data)
def _find_and_sort(self, query_filter=None, as_list=False, **kwargs): """ Query `self.repo` using Pymongo arguments/kwargs. Sorts based on enthalpy_per_atom and optionally returns list of Crystals. Keyword arguments: query_filter (dict): the query to use. If None, perform a blank query. as_list (bool): whether to return a list of a pm.cursor.Cursor object. Returns: list/pm.cursor.Cursor: the results of the query. int: the number of results in the query. """ from matador.crystal import Crystal if query_filter is None: query_filter = {} count = self.repo.count_documents(query_filter, **kwargs) cursor = self.repo.find(query_filter, **kwargs).sort('enthalpy_per_atom', pm.ASCENDING) if self.args.get('as_crystal'): return [Crystal(doc) for doc in cursor], count if count < self.cursor_min_limit or as_list: return list(cursor), count return cursor, count
def construct_dft_hamiltonian(self, doc: Crystal) -> MatadorHamiltonian: if "pseudopotentials" not in doc._data: doc._data["pseudopotentials"] = self.construct_pseudopotentials( doc) spin = doc._data.get("spin_polarized", False) spin_treatment = "none" if spin: spin_treatment = doc._data.get("spin_treatment", "none") doc._data["spin_treatment"] = spin_treatment ext_pressure = np.zeros((3, 3)) for i in range(3): for j in range(3 - i): ext_pressure[i][j] = doc._data["external_pressure"][i][j] doc._data["external_pressure"] = ext_pressure.tolist() doc._data["kpoint_spacing"] = doc._data["kpoints_mp_spacing"] return MatadorHamiltonian(**doc._data)
def __init__( self, structures: list = None, references: list = None, ): self.wlines = WORDS self.nlines = NOUNS self.num_words = len(self.wlines) self.num_nouns = len(self.nlines) documents = structures out_cursor = [] if references is not None: for ref in references: ref["last_modified"] = datetime.datetime.now() # check that all refs can be validated as ReferenceResources [ ReferenceResource( id=ref["id"], attributes={key: ref[key] for key in ref if key != "id"}, ) for ref in references ] last_id = 0 curs = (ENTRY_COLLECTIONS["structures"].collection.find({}).sort( "_id", pm.DESCENDING).limit(1)) if len(list(curs)) == 1: last_id = int(curs[0]["id"].split("odbx_")[1]) for ind, doc in tqdm.tqdm(enumerate(documents)): crys_doc = Crystal(doc) structure = self.create_optimade_structure(crys_doc, last_id + 1 + ind) structure["relationships"] = {} structure["relationships"]["references"] = {} structure["relationships"]["references"]["data"] = [{ "id": ref["id"], "type": "references" } for ref in references] out_cursor.append(structure) _ = ENTRY_COLLECTIONS["references"].collection.insert_many(references) _ = ENTRY_COLLECTIONS["structures"].collection.insert_many(out_cursor)
def ase2dict(atoms, as_model=False) -> Union[dict, Crystal]: """ Return a matador document (dictionary or :obj:`Crystal`) from an `ase.Atoms` object. Parameters: atoms (ase.Atoms): input structure. Keyword arguments: as_model (bool): if `True`, return a Crystal instead of a dictionary. Returns: Union[dict, Crystal]: matador output. """ from matador.utils.cell_utils import cart2abc doc = {} # sort atoms, then their positions doc['atom_types'] = atoms.get_chemical_symbols() inds = [ i[0] for i in sorted(enumerate(doc['atom_types']), key=lambda x: x[1]) ] doc['positions_frac'] = atoms.get_scaled_positions().tolist() doc['positions_frac'] = [doc['positions_frac'][ind] for ind in inds] doc['atom_types'] = [doc['atom_types'][ind] for ind in inds] try: doc['lattice_cart'] = atoms.get_cell().tolist() except AttributeError: doc['lattice_cart'] = atoms.get_cell().array.tolist() doc['lattice_abc'] = cart2abc(doc['lattice_cart']) doc['num_atoms'] = len(doc['atom_types']) doc['stoichiometry'] = get_stoich(doc['atom_types']) doc['cell_volume'] = atoms.get_volume() doc['elems'] = {atom for atom in doc['atom_types']} doc['num_fu'] = doc['num_atoms'] / int( sum(doc['stoichiometry'][i][1] for i in range(len(doc['stoichiometry'])))) doc['space_group'] = get_spacegroup_spg(doc, symprec=0.001) if atoms.info: doc["ase_info"] = copy.deepcopy(atoms.info) if as_model: doc = Crystal(doc) return doc
def test_with_crystals(self): from matador.crystal import Crystal import glob files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res") cursor = [Crystal(res2dict(f)[0]) for f in files] uniq_inds, _, _, _ = get_uniq_cursor(cursor, sim_tol=0, energy_tol=1e20, projected=True, debug=True, **{ "dr": 0.1, "gaussian_width": 0.1 }) filtered_cursor = [cursor[ind] for ind in uniq_inds] self.assertEqual(len(filtered_cursor), len(cursor))
def test_with_skips(self): from matador.crystal import Crystal from matador.utils.cursor_utils import filter_unique_structures import glob files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res") cursor = [Crystal(res2dict(f)[0]) for f in files] filtered_cursor = filter_unique_structures(cursor, energy_tol=0) self.assertEqual(len(filtered_cursor), len(cursor)) cursor = sorted([res2dict(f)[0] for f in files], key=lambda doc: doc["enthalpy_per_atom"])[0:10] for ind, doc in enumerate(cursor): doc["enthalpy_per_atom"] = float(-ind) cursor[8]["enthalpy_per_atom"] = -5.0 cursor[9]["enthalpy_per_atom"] = -5.0001 filtered_cursor = filter_unique_structures(cursor, energy_tol=0.003) self.assertEqual(len(filtered_cursor), 8)
def standardize_doc_cell(doc, primitive=True, symprec=1e-2): """ Return standardized cell data from matador doc. Parameters: doc (dict or :class:`Crystal`): matador document or Crystal object. Keyword arguments: primitive (bool): whether to reduce cell to primitive. symprec (float): spglib symmetry tolerance. Returns: dict: matador document containing standardized cell. """ import spglib as spg from matador.crystal import Crystal from matador.utils.chem_utils import get_atomic_symbol from copy import deepcopy spg_cell = doc2spg(doc) spg_standardized = spg.standardize_cell(spg_cell, to_primitive=primitive, symprec=symprec) if not isinstance(doc, Crystal): std_doc = deepcopy(doc) else: std_doc = deepcopy(doc._data) std_doc['lattice_cart'] = [list(vec) for vec in spg_standardized[0]] std_doc['lattice_abc'] = cart2abc(std_doc['lattice_cart']) std_doc['positions_frac'] = [list(atom) for atom in spg_standardized[1]] std_doc['atom_types'] = [ get_atomic_symbol(atom) for atom in spg_standardized[2] ] std_doc['site_occupancy'] = len(std_doc['positions_frac']) * [1] std_doc['cell_volume'] = cart2volume(std_doc['lattice_cart']) std_doc['space_group'] = get_spacegroup_spg(std_doc, symprec=symprec) # if the original document was a crystal, return a new one if isinstance(doc, Crystal): std_doc = Crystal(std_doc) return std_doc
def construct_spacegroup(self, doc: Crystal, tolerance=1e-3) -> MatadorSpaceGroup: """ Generate the space group at the standardised tolerance. """ return MatadorSpaceGroup(symbol=doc.get_space_group(symprec=tolerance), spglib_tolerance=tolerance)
def plot_magres( magres: Union[List[Crystal], Crystal], species: str, magres_key: str = "chemical_shielding_iso", xlabel: str = None, broadening_width: float = 1, text_offset: float = 0.1, ax=None, figsize: Tuple[float] = None, show: bool = False, savefig: Optional[str] = None, signal_labels: Optional[Union[str, List[str]]] = None, signal_limits: Tuple[float] = None, line_kwargs: Optional[Union[Dict, List[Dict]]] = None, ): """ Plot voltage curve calculated for phase diagram. Parameters: magres (Union[Crystal, List[Crystal]]): list of :class:`Crystal` containing magres data. species (str): the species to plot the shifts of. Keyword arguments: ax (matplotlib.axes.Axes): an existing axis on which to plot. magres_key (str): the data key for which the magres site data is stored under. show (bool): whether to show plot in an X window. figsize (Tuple[float]): overrides the default size for the matplotlib figure. broadening_width (float): the Lorentzian width to apply to the shifts. xlabel (str): a custom label for the x-axis. savefig (str): filename to use to save the plot. signal_labels (list): optional list of labels for the curves in the magres list. signal_limits (Tuple[float]): values at which to clip the magres signals. Defaults to the maximum and minimum shifts across all passed structures. line_kwargs (list or dict): parameters to pass to the curve plotter, if a list then the line kwargs will be passed to each line individually. """ import matplotlib.pyplot as plt if not isinstance(magres, list): magres = [magres] if signal_labels is not None and not isinstance(signal_labels, list): signal_labels = [signal_labels] if line_kwargs is not None and not isinstance(line_kwargs, list): line_kwargs = len(magres) * [line_kwargs] if figsize is None: _user_default_figsize = plt.rcParams.get('figure.figsize', (8, 6)) height = len(magres) * max( 0.5, _user_default_figsize[1] / 1.5 / len(magres)) figsize = (_user_default_figsize[0], height) if ax is None: fig = plt.figure(figsize=figsize) ax = fig.add_subplot(111) if species is None: raise RuntimeError("You must provide a species label for plotting.") if signal_labels is not None and len(signal_labels) != len(magres): raise RuntimeError( "Wrong number of labels passed for number of magres: {} vs {}". format(len(signal_labels), len(magres))) _magres = [] if signal_limits is not None: min_shielding, max_shielding = signal_limits else: min_shielding, max_shielding = (1e20, -1e20) for ind, doc in enumerate(magres): if isinstance(doc, dict): _doc = Crystal(doc) else: _doc = doc _magres.append(_doc) relevant_sites = [atom for atom in _doc if atom.species == species] if relevant_sites: shielding = [atom[magres_key] for atom in relevant_sites] if signal_limits is None: min_shielding = min(np.min(shielding), min_shielding) max_shielding = max(np.max(shielding), max_shielding) if min_shielding > 1e19 and max_shielding < -1e19: raise RuntimeError( f"No sites of {species} found in any of the passed crystals.") _buffer = 0.2 * np.abs(min_shielding - max_shielding) s_space = np.linspace(min_shielding - _buffer, max_shielding + _buffer, num=1000) _padded_colours = list(plt.rcParams["axes.prop_cycle"].by_key()["color"]) _padded_colours = (1 + (len(magres) // len(_padded_colours))) * _padded_colours if line_kwargs is not None and len(line_kwargs) != len(magres): raise RuntimeError( "Wrong number of line kwargs passed for number of magres: {} vs {}" .format(len(line_kwargs), len(magres))) for ind, doc in enumerate(magres): if signal_labels is None: stoich_label = doc.formula_tex else: stoich_label = None _label = stoich_label if signal_labels is not None and len(signal_labels) > ind: _label = signal_labels[ind] _line_kwargs = {'c': _padded_colours[ind]} if line_kwargs is not None: _line_kwargs.update(line_kwargs[ind]) relevant_sites = [site for site in doc if site.species == species] if not relevant_sites: print( f"No sites of {species} found in {doc.root_source}, signal will be empty." ) signal = np.zeros_like(s_space) else: shifts = [site[magres_key] for site in relevant_sites] hist, bins = np.histogram(shifts, bins=s_space) if broadening_width > 0: signal = Fingerprint._broadening_unrolled( hist, s_space, broadening_width, broadening_type="lorentzian") else: signal = np.array(hist, dtype=np.float64) bin_centres = s_space[:-1] + (s_space[1] - s_space[0]) / 2 s_space = bin_centres if np.max(signal) > 1e-10: signal /= np.max(signal) else: signal *= 0 ax.plot(s_space, signal + (ind * 1.1), **_line_kwargs) if _label is not None: ax.text(0.95, (ind * 1.1) + text_offset, _label, transform=ax.get_yaxis_transform(), horizontalalignment='right') if xlabel is None: unit = set( doc.get("magres_units", {}).get("ms", "ppm") for doc in magres) if len(unit) > 1: raise RuntimeError( f"Multiple incompatible units found for chemical shift: {unit}" ) unit = list(unit)[0] if magres_key == "chemical_shielding_iso": xlabel = f"{species}: Isotropic chemical shielding $\\sigma_\\mathrm{{iso}}$ ({unit})" elif magres_key == "chemical_shift_iso": xlabel = f"{species}: Isotropic chemical shift $\\sigma_\\mathrm{{iso}}$ ({unit})" elif magres_key == "chemical_shift_aniso": xlabel = f"{species}: Anisotropic chemical shift $\\sigma_\\mathrm{{iso}}$ ({unit})" elif magres_key == "chemical_shift_asymmetry": xlabel = f"{species}: Chemial shift asymmetry, $\\eta$" ax.set_xlabel(xlabel) ax.set_ylabel("Intensity (arb. units)") if len(magres) > 1: ax.set_yticks([]) else: ax.set_yticks(np.linspace(0, 1, 5, endpoint=True)) ax.set_ylim(-0.1, 1.1 * len(magres)) if savefig: plt.savefig(savefig) print('Wrote {}'.format(savefig)) elif show: plt.show() return ax
def __init__(self, doc, wavelength: float = 1.5406, lorentzian_width: float = 0.03, two_theta_resolution: float = 0.01, two_theta_bounds: Tuple[float, float] = (0, 90), theta_m: float = 0.0, scattering_factors: str = "RASPA", lazy=False, plot=False, progress=False, *args, **kwargs): """ Set up the PXRD, and compute it, if lazy is False. Parameters: doc (dict/Crystal): matador document to compute PXRD for. Keyword arguments: lorentzian_width (float): width of Lorentzians for broadening (DEFAULT: 0.03) wavelength (float): incident X-ray wavelength (DEFAULT: CuKa, 1.5406). theta_m (float): the monochromator angle in degrees (DEFAULT: 0) two_theta_resolution (float): resolution of grid 2θ used for plotting. two_theta_bounds (tuple of float): values between which to compute the PXRD pattern. scattering_factors (str): either "GSAS" or "RASPA" (default), which set of atomic scattering factors to use. lazy (bool): whether to compute PXRD or just set it up. plot (bool): whether to display PXRD as a plot. """ self.wavelength = wavelength self.lorentzian_width = lorentzian_width self.two_theta_resolution = two_theta_resolution if two_theta_bounds is not None: self.two_theta_bounds = list(two_theta_bounds) else: self.two_theta_bounds = [0, 90] self.theta_m = theta_m self.scattering_factors = scattering_factors self.progress = progress if self.two_theta_bounds[0] < THETA_TOL: self.two_theta_bounds[0] = THETA_TOL if np.min(doc.get('site_occupancy', [1.0])) < 1.0: print("System has partial occupancy, not refining with spglib.") self.doc = Crystal(doc) else: self.doc = Crystal(standardize_doc_cell(doc, primitive=True)) self.formula = get_formula_from_stoich(self.doc['stoichiometry'], tex=True) self.spg = self.doc['space_group'] species = list(set(self.doc['atom_types'])) # this could be cached across PXRD objects but is much faster than the XRD calculation itself if self.scattering_factors == "GSAS": from matador.data.atomic_scattering import GSAS_ATOMIC_SCATTERING_COEFFS self.atomic_scattering_coeffs = { spec: GSAS_ATOMIC_SCATTERING_COEFFS[spec] for spec in species } elif self.scattering_factors == "RASPA": from matador.data.atomic_scattering import RASPA_ATOMIC_SCATTERING_COEFFS self.atomic_scattering_coeffs = { spec: RASPA_ATOMIC_SCATTERING_COEFFS[spec] for spec in species } else: raise RuntimeError( "No set of scattering factors matched: {}. Please use 'GSAS' or 'RASPA'." .format(self.scattering_factors)) if not lazy: self.calculate() if plot: self.plot()