Ejemplo n.º 1
0
def test_pickelable_tinydb_can_be_pickled_and_unpickled():
    """PickleableTinyDB should be able to be pickled and unpickled."""
    test_dict = {'test_key': ['test', 'values']}
    db = PickleableTinyDB(storage=MemoryStorage)
    db.insert(test_dict)
    db = pickle.loads(pickle.dumps(db))
    assert db.search(where('test_key').exists())[0] == test_dict
def get_equilibrium_thermochemical_data(
    dbf: Database,
    comps: Sequence[str],
    phases: Sequence[str],
    datasets: PickleableTinyDB,
    model: Optional[Dict[str, Model]] = None,
    parameters: Optional[Dict[str, float]] = None,
    data_weight_dict: Optional[Dict[str, float]] = None,
) -> Sequence[EqPropData]:
    """
    Get all the EqPropData for each matching equilibrium thermochemical dataset in the datasets

    Parameters
    ----------
    dbf : Database
        Database with parameters to fit
    comps : Sequence[str]
        List of pure element components used to find matching datasets.
    phases : Sequence[str]
        List of phases used to search for matching datasets.
    datasets : PickleableTinyDB
        Datasets that contain single phase data
    model : Optional[Dict[str, Type[Model]]]
        Dictionary phase names to pycalphad Model classes.
    parameters : Optional[Dict[str, float]]
        Mapping of parameter symbols to values.
    data_weight_dict : Optional[Dict[str, float]]
        Mapping of a data type (e.g. `HM` or `SM`) to a weight.

    Notes
    -----
    Found datasets will be subsets of the components and phases. Equilibrium
    thermochemical data is assumed to be any data that does not have the
    `solver` key, and does not have an output of `ZPF` or `ACR` (which
    correspond to different data types than can be calculated here.)

    Returns
    -------
    Sequence[EqPropData]
    """

    desired_data = datasets.search(
        # data that isn't ZPF or non-equilibrium thermochemical
        (where('output') != 'ZPF') & (~where('solver').exists())
        & (where('output').test(lambda x: 'ACR' not in x))
        &  # activity data not supported yet
        (where('components').test(lambda x: set(x).issubset(comps)))
        & (where('phases').test(lambda x: set(x).issubset(set(phases)))))

    eq_thermochemical_data = []  # 1:1 correspondence with each dataset
    for data in desired_data:
        eq_thermochemical_data.append(
            build_eqpropdata(data,
                             dbf,
                             model=model,
                             parameters=parameters,
                             data_weight_dict=data_weight_dict))
    return eq_thermochemical_data
Ejemplo n.º 3
0
def apply_tags(datasets: PickleableTinyDB, tags):
    """
    Modify datasets using the tags system

    Parameters
    ----------
    datasets : PickleableTinyDB
        Datasets to modify
    tags : dict
        Dictionary of {tag: update_dict}

    Returns
    -------
    None

    Notes
    -----
    In general, everything replaces or is additive. We use the following update rules:
    1. If the update value is a list, extend the existing list (empty list if key does not exist)
    2. If the update value is scalar, override the previous (deleting any old value, if present)
    3. If the update value is a dict, update the exist dict (empty dict if dict does not exist)
    4. Otherwise, the value is updated, overriding the previous

    Examples
    --------
    >>> from espei.utils import PickleableTinyDB
    >>> from tinydb.storages import MemoryStorage
    >>> ds = PickleableTinyDB(storage=MemoryStorage)
    >>> doc_id = ds.insert({'tags': ['dft'], 'excluded_model_contributions': ['contrib']})
    >>> my_tags = {'dft': {'excluded_model_contributions': ['idmix', 'mag'], 'weight': 5.0}}
    >>> from espei.datasets import apply_tags
    >>> apply_tags(ds, my_tags)
    >>> all_data = ds.all()
    >>> all(d['excluded_model_contributions'] == ['contrib', 'idmix', 'mag'] for d in all_data)
    True
    >>> all(d['weight'] == 5.0 for d in all_data)
    True

    """
    for tag, update_dict in tags.items():
        matching_datasets = datasets.search(where("tags").test(lambda x: tag in x))
        for newkey, newval in update_dict.items():
            for match in matching_datasets:
                if isinstance(newval, list):
                    match[newkey] = match.get(newkey, []) + newval
                elif np.isscalar(newval):
                    match[newkey] = newval
                elif isinstance(newval, dict):
                    d = match.get(newkey, dict())
                    d.update(newval)
                    match[newkey] = d
                else:
                    match[newkey] = newval
                datasets.update(match, doc_ids=[match.doc_id])
Ejemplo n.º 4
0
def get_zpf_data(dbf: Database,
                 comps: Sequence[str],
                 phases: Sequence[str],
                 datasets: PickleableTinyDB,
                 parameters: Dict[str, float],
                 model: Optional[Dict[str, Type[Model]]] = None):
    """
    Return the ZPF data used in the calculation of ZPF error

    Parameters
    ----------
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    parameters : dict
        Dictionary mapping symbols to optimize to their initial values
    model : Optional[Dict[str, Type[Model]]]
        Dictionary phase names to pycalphad Model classes.

    Returns
    -------
    list
        List of data dictionaries with keys ``weight``, ``phase_regions`` and ``dataset_references``.
    """
    desired_data = datasets.search(
        (tinydb.where('output') == 'ZPF')
        & (tinydb.where('components').test(lambda x: set(x).issubset(comps)))
        & (tinydb.where('phases').test(
            lambda x: len(set(phases).intersection(x)) > 0)))

    zpf_data = []  # 1:1 correspondence with each dataset
    for data in desired_data:
        data_comps = list(set(data['components']).union({'VA'}))
        species = sorted(unpack_components(dbf, data_comps), key=str)
        data_phases = filter_phases(dbf, species, candidate_phases=phases)
        models = instantiate_models(dbf,
                                    species,
                                    data_phases,
                                    model=model,
                                    parameters=parameters)
        # assumed N, P, T state variables
        phase_recs = build_phase_records(dbf,
                                         species,
                                         data_phases, {v.N, v.P, v.T},
                                         models,
                                         parameters=parameters,
                                         build_gradients=True,
                                         build_hessians=True)
        all_phase_points = {
            phase_name: _sample_phase_constitution(models[phase_name],
                                                   point_sample, True, 50)
            for phase_name in data_phases
        }
        all_regions = data['values']
        conditions = data['conditions']
        phase_regions = []
        # Each phase_region is one set of phases in equilibrium (on a tie-line),
        # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]]
        for idx, phase_region in enumerate(all_regions):
            # Extract the conditions for entire phase region
            pot_conds = _extract_pot_conds(conditions, idx)
            pot_conds.setdefault(v.N, 1.0)  # Add v.N condition, if missing
            # Extract all the phases and compositions from the tie-line points
            vertices = []
            for vertex in phase_region:
                phase_name, comp_conds, disordered_flag = _extract_phases_comps(
                    vertex)
                # Construct single-phase points satisfying the conditions for each phase in the region
                mod = models[phase_name]
                composition = _compute_vertex_composition(
                    data_comps, comp_conds)
                if np.any(np.isnan(composition)):
                    # We can't construct points because we don't have a known composition
                    has_missing_comp_cond = True
                    phase_points = None
                elif _phase_is_stoichiometric(mod):
                    has_missing_comp_cond = False
                    phase_points = None
                else:
                    has_missing_comp_cond = False
                    # Only sample points that have an average mass residual within tol
                    tol = 0.02
                    phase_points = _subsample_phase_points(
                        phase_recs[phase_name], all_phase_points[phase_name],
                        composition, tol)
                    assert phase_points.shape[
                        0] > 0, f"phase {phase_name} must have at least one set of points within the target tolerance {pot_conds} {comp_conds}"
                vtx = RegionVertex(phase_name, composition, comp_conds,
                                   phase_points, phase_recs, disordered_flag,
                                   has_missing_comp_cond)
                vertices.append(vtx)
            region = PhaseRegion(vertices, pot_conds, species, data_phases,
                                 models)
            phase_regions.append(region)

        data_dict = {
            'weight': data.get('weight', 1.0),
            'phase_regions': phase_regions,
            'dataset_reference': data['reference']
        }
        zpf_data.append(data_dict)
    return zpf_data
Ejemplo n.º 5
0
def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str],
                 datasets: PickleableTinyDB, parameters: Dict[str, float]):
    """
    Return the ZPF data used in the calculation of ZPF error

    Parameters
    ----------
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    parameters : dict
        Dictionary mapping symbols to optimize to their initial values

    Returns
    -------
    list
        List of data dictionaries with keys ``weight``, ``data_comps`` and
        ``phase_regions``. ``data_comps`` are the components for the data in
        question. ``phase_regions`` are the ZPF phases, state variables and compositions.
    """
    desired_data = datasets.search(
        (tinydb.where('output') == 'ZPF')
        & (tinydb.where('components').test(lambda x: set(x).issubset(comps)))
        & (tinydb.where('phases').test(
            lambda x: len(set(phases).intersection(x)) > 0)))

    zpf_data = []  # 1:1 correspondence with each dataset
    for data in desired_data:
        data_comps = list(set(data['components']).union({'VA'}))
        species = sorted(unpack_components(dbf, data_comps), key=str)
        data_phases = filter_phases(dbf, species, candidate_phases=phases)
        models = instantiate_models(dbf,
                                    species,
                                    data_phases,
                                    parameters=parameters)
        all_regions = data['values']
        conditions = data['conditions']
        phase_regions = []
        # Each phase_region is one set of phases in equilibrium (on a tie-line),
        # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]]
        for idx, phase_region in enumerate(all_regions):
            # We need to construct a PhaseRegion by matching up phases/compositions to the conditions
            if len(phase_region) < 2:
                # Skip single-phase regions for fitting purposes
                continue
            # Extract the conditions for entire phase region
            region_potential_conds = extract_conditions(conditions, idx)
            region_potential_conds[v.N] = region_potential_conds.get(
                v.N) or 1.0  # Add v.N condition, if missing
            # Extract all the phases and compositions from the tie-line points
            region_phases, region_comp_conds, phase_flags = extract_phases_comps(
                phase_region)
            region_phase_records = [
                build_phase_records(dbf,
                                    species,
                                    data_phases, {
                                        **region_potential_conds,
                                        **comp_conds
                                    },
                                    models,
                                    parameters=parameters,
                                    build_gradients=True,
                                    build_hessians=True)
                for comp_conds in region_comp_conds
            ]
            phase_regions.append(
                PhaseRegion(region_phases, region_potential_conds,
                            region_comp_conds, phase_flags, dbf, species,
                            data_phases, models, region_phase_records))

        data_dict = {
            'weight': data.get('weight', 1.0),
            'data_comps': data_comps,
            'phase_regions': phase_regions,
            'dataset_reference': data['reference']
        }
        zpf_data.append(data_dict)
    return zpf_data