def test_pickelable_tinydb_can_be_pickled_and_unpickled(): """PickleableTinyDB should be able to be pickled and unpickled.""" test_dict = {'test_key': ['test', 'values']} db = PickleableTinyDB(storage=MemoryStorage) db.insert(test_dict) db = pickle.loads(pickle.dumps(db)) assert db.search(where('test_key').exists())[0] == test_dict
def get_equilibrium_thermochemical_data( dbf: Database, comps: Sequence[str], phases: Sequence[str], datasets: PickleableTinyDB, model: Optional[Dict[str, Model]] = None, parameters: Optional[Dict[str, float]] = None, data_weight_dict: Optional[Dict[str, float]] = None, ) -> Sequence[EqPropData]: """ Get all the EqPropData for each matching equilibrium thermochemical dataset in the datasets Parameters ---------- dbf : Database Database with parameters to fit comps : Sequence[str] List of pure element components used to find matching datasets. phases : Sequence[str] List of phases used to search for matching datasets. datasets : PickleableTinyDB Datasets that contain single phase data model : Optional[Dict[str, Type[Model]]] Dictionary phase names to pycalphad Model classes. parameters : Optional[Dict[str, float]] Mapping of parameter symbols to values. data_weight_dict : Optional[Dict[str, float]] Mapping of a data type (e.g. `HM` or `SM`) to a weight. Notes ----- Found datasets will be subsets of the components and phases. Equilibrium thermochemical data is assumed to be any data that does not have the `solver` key, and does not have an output of `ZPF` or `ACR` (which correspond to different data types than can be calculated here.) Returns ------- Sequence[EqPropData] """ desired_data = datasets.search( # data that isn't ZPF or non-equilibrium thermochemical (where('output') != 'ZPF') & (~where('solver').exists()) & (where('output').test(lambda x: 'ACR' not in x)) & # activity data not supported yet (where('components').test(lambda x: set(x).issubset(comps))) & (where('phases').test(lambda x: set(x).issubset(set(phases))))) eq_thermochemical_data = [] # 1:1 correspondence with each dataset for data in desired_data: eq_thermochemical_data.append( build_eqpropdata(data, dbf, model=model, parameters=parameters, data_weight_dict=data_weight_dict)) return eq_thermochemical_data
def apply_tags(datasets: PickleableTinyDB, tags): """ Modify datasets using the tags system Parameters ---------- datasets : PickleableTinyDB Datasets to modify tags : dict Dictionary of {tag: update_dict} Returns ------- None Notes ----- In general, everything replaces or is additive. We use the following update rules: 1. If the update value is a list, extend the existing list (empty list if key does not exist) 2. If the update value is scalar, override the previous (deleting any old value, if present) 3. If the update value is a dict, update the exist dict (empty dict if dict does not exist) 4. Otherwise, the value is updated, overriding the previous Examples -------- >>> from espei.utils import PickleableTinyDB >>> from tinydb.storages import MemoryStorage >>> ds = PickleableTinyDB(storage=MemoryStorage) >>> doc_id = ds.insert({'tags': ['dft'], 'excluded_model_contributions': ['contrib']}) >>> my_tags = {'dft': {'excluded_model_contributions': ['idmix', 'mag'], 'weight': 5.0}} >>> from espei.datasets import apply_tags >>> apply_tags(ds, my_tags) >>> all_data = ds.all() >>> all(d['excluded_model_contributions'] == ['contrib', 'idmix', 'mag'] for d in all_data) True >>> all(d['weight'] == 5.0 for d in all_data) True """ for tag, update_dict in tags.items(): matching_datasets = datasets.search(where("tags").test(lambda x: tag in x)) for newkey, newval in update_dict.items(): for match in matching_datasets: if isinstance(newval, list): match[newkey] = match.get(newkey, []) + newval elif np.isscalar(newval): match[newkey] = newval elif isinstance(newval, dict): d = match.get(newkey, dict()) d.update(newval) match[newkey] = d else: match[newkey] = newval datasets.update(match, doc_ids=[match.doc_id])
def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], datasets: PickleableTinyDB, parameters: Dict[str, float], model: Optional[Dict[str, Type[Model]]] = None): """ Return the ZPF data used in the calculation of ZPF error Parameters ---------- comps : list List of active component names phases : list List of phases to consider datasets : espei.utils.PickleableTinyDB Datasets that contain single phase data parameters : dict Dictionary mapping symbols to optimize to their initial values model : Optional[Dict[str, Type[Model]]] Dictionary phase names to pycalphad Model classes. Returns ------- list List of data dictionaries with keys ``weight``, ``phase_regions`` and ``dataset_references``. """ desired_data = datasets.search( (tinydb.where('output') == 'ZPF') & (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('phases').test( lambda x: len(set(phases).intersection(x)) > 0))) zpf_data = [] # 1:1 correspondence with each dataset for data in desired_data: data_comps = list(set(data['components']).union({'VA'})) species = sorted(unpack_components(dbf, data_comps), key=str) data_phases = filter_phases(dbf, species, candidate_phases=phases) models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters) # assumed N, P, T state variables phase_recs = build_phase_records(dbf, species, data_phases, {v.N, v.P, v.T}, models, parameters=parameters, build_gradients=True, build_hessians=True) all_phase_points = { phase_name: _sample_phase_constitution(models[phase_name], point_sample, True, 50) for phase_name in data_phases } all_regions = data['values'] conditions = data['conditions'] phase_regions = [] # Each phase_region is one set of phases in equilibrium (on a tie-line), # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]] for idx, phase_region in enumerate(all_regions): # Extract the conditions for entire phase region pot_conds = _extract_pot_conds(conditions, idx) pot_conds.setdefault(v.N, 1.0) # Add v.N condition, if missing # Extract all the phases and compositions from the tie-line points vertices = [] for vertex in phase_region: phase_name, comp_conds, disordered_flag = _extract_phases_comps( vertex) # Construct single-phase points satisfying the conditions for each phase in the region mod = models[phase_name] composition = _compute_vertex_composition( data_comps, comp_conds) if np.any(np.isnan(composition)): # We can't construct points because we don't have a known composition has_missing_comp_cond = True phase_points = None elif _phase_is_stoichiometric(mod): has_missing_comp_cond = False phase_points = None else: has_missing_comp_cond = False # Only sample points that have an average mass residual within tol tol = 0.02 phase_points = _subsample_phase_points( phase_recs[phase_name], all_phase_points[phase_name], composition, tol) assert phase_points.shape[ 0] > 0, f"phase {phase_name} must have at least one set of points within the target tolerance {pot_conds} {comp_conds}" vtx = RegionVertex(phase_name, composition, comp_conds, phase_points, phase_recs, disordered_flag, has_missing_comp_cond) vertices.append(vtx) region = PhaseRegion(vertices, pot_conds, species, data_phases, models) phase_regions.append(region) data_dict = { 'weight': data.get('weight', 1.0), 'phase_regions': phase_regions, 'dataset_reference': data['reference'] } zpf_data.append(data_dict) return zpf_data
def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], datasets: PickleableTinyDB, parameters: Dict[str, float]): """ Return the ZPF data used in the calculation of ZPF error Parameters ---------- comps : list List of active component names phases : list List of phases to consider datasets : espei.utils.PickleableTinyDB Datasets that contain single phase data parameters : dict Dictionary mapping symbols to optimize to their initial values Returns ------- list List of data dictionaries with keys ``weight``, ``data_comps`` and ``phase_regions``. ``data_comps`` are the components for the data in question. ``phase_regions`` are the ZPF phases, state variables and compositions. """ desired_data = datasets.search( (tinydb.where('output') == 'ZPF') & (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('phases').test( lambda x: len(set(phases).intersection(x)) > 0))) zpf_data = [] # 1:1 correspondence with each dataset for data in desired_data: data_comps = list(set(data['components']).union({'VA'})) species = sorted(unpack_components(dbf, data_comps), key=str) data_phases = filter_phases(dbf, species, candidate_phases=phases) models = instantiate_models(dbf, species, data_phases, parameters=parameters) all_regions = data['values'] conditions = data['conditions'] phase_regions = [] # Each phase_region is one set of phases in equilibrium (on a tie-line), # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]] for idx, phase_region in enumerate(all_regions): # We need to construct a PhaseRegion by matching up phases/compositions to the conditions if len(phase_region) < 2: # Skip single-phase regions for fitting purposes continue # Extract the conditions for entire phase region region_potential_conds = extract_conditions(conditions, idx) region_potential_conds[v.N] = region_potential_conds.get( v.N) or 1.0 # Add v.N condition, if missing # Extract all the phases and compositions from the tie-line points region_phases, region_comp_conds, phase_flags = extract_phases_comps( phase_region) region_phase_records = [ build_phase_records(dbf, species, data_phases, { **region_potential_conds, **comp_conds }, models, parameters=parameters, build_gradients=True, build_hessians=True) for comp_conds in region_comp_conds ] phase_regions.append( PhaseRegion(region_phases, region_potential_conds, region_comp_conds, phase_flags, dbf, species, data_phases, models, region_phase_records)) data_dict = { 'weight': data.get('weight', 1.0), 'data_comps': data_comps, 'phase_regions': phase_regions, 'dataset_reference': data['reference'] } zpf_data.append(data_dict) return zpf_data