def symmetry_filter(x, config, symmetry): """ Return True if the candidate sublattice configuration has any symmetry which matches the phase model symmetry. Parameters ---------- x : dict the candidate dataset 'solver' dict. Must contain the "sublattice_configurations" key config : list the configuration of interest: e.g. ['AL', ['AL', 'NI'], 'VA'] symmetry : list tuple of tuples where each inner tuple is a group of equivalent sublattices. A value of ((0, 1), (2, 3, 4)) means that sublattices at indices 0 and 1 are symmetrically equivalent to each other and sublattices at indices 2, 3, and 4 are symetrically equivalent to each other. Returns ------- bool """ if x['mode'] == 'manual': if len(config) != len(x['sublattice_configurations'][0]): return False # If even one matches, it's a match # We do more filtering downstream for data_config in x['sublattice_configurations']: if canonicalize(config, symmetry) == canonicalize(data_config, symmetry): return True return False
def get_data(comps, phase_name, configuration, symmetry, datasets, prop): """ Return list of cleaned single phase datasets matching the passed arguments. Parameters ---------- comps : list List of string component names phase_name : str Name of phase configuration : tuple Sublattice configuration as a tuple, e.g. ("CU", ("CU", "MG")) symmetry : list of lists List of sublattice indices with symmetry datasets : espei.utils.PickleableTinyDB Database of datasets to search for data prop : list String name of the property of interest. Returns ------- list List of datasets matching the arguments. """ desired_data = datasets.search((tinydb.where('output').test(lambda x: x in prop)) & (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) & (tinydb.where('phases') == [phase_name])) # This seems to be necessary because the 'values' member does not modify 'datasets' # But everything else does! desired_data = copy.deepcopy(desired_data) def recursive_zip(a, b): if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): return list(recursive_zip(x, y) for x, y in zip(a, b)) else: return list(zip(a, b)) for idx, data in enumerate(desired_data): # Filter output values to only contain data for matching sublattice configurations matching_configs = np.array([(canonicalize(sblconf, symmetry) == canonicalize(configuration, symmetry)) for sblconf in data['solver']['sublattice_configurations']]) matching_configs = np.arange(len(data['solver']['sublattice_configurations']))[matching_configs] # Rewrite output values with filtered data desired_data[idx]['values'] = np.array(data['values'], dtype=np.float)[..., matching_configs] desired_data[idx]['solver']['sublattice_configurations'] = recursive_tuplify(np.array(data['solver']['sublattice_configurations'], dtype=np.object)[matching_configs].tolist()) try: desired_data[idx]['solver']['sublattice_occupancies'] = np.array(data['solver']['sublattice_occupancies'], dtype=np.object)[matching_configs].tolist() except KeyError: pass # Filter out temperatures below 298.15 K (for now, until better refstates exist) temp_filter = np.atleast_1d(data['conditions']['T']) >= 298.15 desired_data[idx]['conditions']['T'] = np.atleast_1d(data['conditions']['T'])[temp_filter] # Don't use data['values'] because we rewrote it above; not sure what 'data' references now desired_data[idx]['values'] = desired_data[idx]['values'][..., temp_filter, :] return desired_data
def filter_sublattice_configurations( desired_data: List[Dataset], subl_model) -> List[Dataset]: # TODO: symmetry support """Modify the desired_data to remove any configurations that cannot be represented by the sublattice model.""" subl_model_sets = [set(subl) for subl in subl_model] for data in desired_data: matching_configs = [ ] # binary mask of whether a configuration is represented by the sublattice model for config in data['solver']['sublattice_configurations']: config = recursive_tuplify(canonicalize(config, None)) if (len(config) == len(subl_model) and all( subl.issuperset(tuplify(config_subl)) for subl, config_subl in zip(subl_model_sets, config))): matching_configs.append(True) else: matching_configs.append(False) matching_configs = np.asarray(matching_configs, dtype=np.bool_) # Rewrite output values with filtered data data['values'] = np.array(data['values'], dtype=np.float_)[..., matching_configs] data['solver']['sublattice_configurations'] = np.array( data['solver']['sublattice_configurations'], dtype=np.object_)[matching_configs].tolist() if 'sublattice_occupancies' in data['solver']: data['solver']['sublattice_occupancies'] = np.array( data['solver']['sublattice_occupancies'], dtype=np.object_)[matching_configs].tolist() return desired_data
def filter_configurations(desired_data: List[Dataset], configuration, symmetry) -> List[Dataset]: """ Return non-equilibrium thermochemical datasets with invalid configurations removed. Parameters ---------- desired_data : List[Dataset] List of non-equilibrium thermochemical datasets configuration : tuple Sublattice configuration as a tuple, e.g. ("CU", ("CU", "MG")) symmetry : list of lists List of sublattice indices with symmetry Returns ------- List[Dataset] """ for data in desired_data: # Filter output values to only contain data for matching sublattice configurations matching_configs = np.array([ (canonicalize(sblconf, symmetry) == canonicalize(configuration, symmetry)) for sblconf in data['solver']['sublattice_configurations'] ]) matching_configs = np.arange( len(data['solver']['sublattice_configurations']))[matching_configs] # Rewrite output values with filtered data data['values'] = np.array(data['values'], dtype=np.float_)[..., matching_configs] data['solver']['sublattice_configurations'] = recursive_tuplify( np.array(data['solver']['sublattice_configurations'], dtype=np.object_)[matching_configs].tolist()) if 'sublattice_occupancies' in data['solver']: data['solver']['sublattice_occupancies'] = np.array( data['solver']['sublattice_occupancies'], dtype=np.object_)[matching_configs].tolist() return desired_data