Exemple #1
0
def test_pickelable_tinydb_can_be_pickled_and_unpickled():
    """PickleableTinyDB should be able to be pickled and unpickled."""
    test_dict = {'test_key': ['test', 'values']}
    db = PickleableTinyDB(storage=MemoryStorage)
    db.insert(test_dict)
    db = pickle.loads(pickle.dumps(db))
    assert db.search(where('test_key').exists())[0] == test_dict
Exemple #2
0
def load_datasets(dataset_filenames):
    """
    Create a PickelableTinyDB with the data from a list of filenames.

    Parameters
    ----------
    dataset_filenames : [str]
        List of filenames to load as datasets

    Returns
    -------
    PickleableTinyDB
    """
    ds_database = PickleableTinyDB(storage=MemoryStorage)
    for fname in dataset_filenames:
        with open(fname) as file_:
            try:
                d = json.load(file_)
                check_dataset(d)
                ds_database.insert(clean_dataset(d))
            except ValueError as e:
                raise ValueError('JSON Error in {}: {}'.format(fname, e))
            except DatasetError as e:
                raise DatasetError('Dataset Error in {}: {}'.format(fname, e))
    return ds_database
Exemple #3
0
def load_datasets(dataset_filenames, include_disabled=False) -> PickleableTinyDB:
    """
    Create a PickelableTinyDB with the data from a list of filenames.

    Parameters
    ----------
    dataset_filenames : [str]
        List of filenames to load as datasets

    Returns
    -------
    PickleableTinyDB
    """
    ds_database = PickleableTinyDB(storage=MemoryStorage)
    for fname in dataset_filenames:
        with open(fname) as file_:
            try:
                d = json.load(file_)
                if not include_disabled and d.get('disabled', False):
                    # The dataset is disabled and not included
                    continue
                check_dataset(d)
                ds_database.insert(clean_dataset(d))
            except ValueError as e:
                raise ValueError('JSON Error in {}: {}'.format(fname, e))
            except DatasetError as e:
                raise DatasetError('Dataset Error in {}: {}'.format(fname, e))
    return ds_database
Exemple #4
0
def apply_tags(datasets: PickleableTinyDB, tags):
    """
    Modify datasets using the tags system

    Parameters
    ----------
    datasets : PickleableTinyDB
        Datasets to modify
    tags : dict
        Dictionary of {tag: update_dict}

    Returns
    -------
    None

    Notes
    -----
    In general, everything replaces or is additive. We use the following update rules:
    1. If the update value is a list, extend the existing list (empty list if key does not exist)
    2. If the update value is scalar, override the previous (deleting any old value, if present)
    3. If the update value is a dict, update the exist dict (empty dict if dict does not exist)
    4. Otherwise, the value is updated, overriding the previous

    Examples
    --------
    >>> from espei.utils import PickleableTinyDB
    >>> from tinydb.storages import MemoryStorage
    >>> ds = PickleableTinyDB(storage=MemoryStorage)
    >>> doc_id = ds.insert({'tags': ['dft'], 'excluded_model_contributions': ['contrib']})
    >>> my_tags = {'dft': {'excluded_model_contributions': ['idmix', 'mag'], 'weight': 5.0}}
    >>> from espei.datasets import apply_tags
    >>> apply_tags(ds, my_tags)
    >>> all_data = ds.all()
    >>> all(d['excluded_model_contributions'] == ['contrib', 'idmix', 'mag'] for d in all_data)
    True
    >>> all(d['weight'] == 5.0 for d in all_data)
    True

    """
    for tag, update_dict in tags.items():
        matching_datasets = datasets.search(where("tags").test(lambda x: tag in x))
        for newkey, newval in update_dict.items():
            for match in matching_datasets:
                if isinstance(newval, list):
                    match[newkey] = match.get(newkey, []) + newval
                elif np.isscalar(newval):
                    match[newkey] = newval
                elif isinstance(newval, dict):
                    d = match.get(newkey, dict())
                    d.update(newval)
                    match[newkey] = d
                else:
                    match[newkey] = newval
                datasets.update(match, doc_ids=[match.doc_id])
Exemple #5
0
def test_dataplot_plots_binary_equilibria_types():
    """Dataplot should be able to reproduce a single boundary (null) equilibria, a tieline, and a 3 phase equilibria for a binary"""
    ds = PickleableTinyDB(storage=MemoryStorage)
    ds.insert(A_B_DATASET_BINARY_PHASE_EQUILIBRIA)

    comps = ['A', 'B']
    phases = ["PHASE_1", "PHASE_2", "PHASE_3"]
    conds = {v.P: 101325, v.T: (0, 400, 40), v.X('B'): (0, 1, 0.01)}

    ax = dataplot(comps, phases, conds, ds)
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 400)
Exemple #6
0
def test_get_data_for_a_minimal_example():
    """Given a dataset and the congfiguration pertaining to that dataset, we should find the values."""
    SAMPLE_DATASET = {
        "components": ["CU", "MG", "VA"],
        "phases": ["LAVES_C15"],
        "solver": {
            "mode":
            "manual",
            "sublattice_site_ratios": [2, 1],
            "sublattice_configurations": [["CU", "MG"], ["MG", "CU"],
                                          ["MG", "MG"], ["CU", "CU"]]
        },
        "conditions": {
            "P": 101325,
            "T": 298.15
        },
        "output": "HM_FORM",
        "values": [[[-15720, 34720, 7000, 15500]]]
    }
    datasets = PickleableTinyDB(storage=MemoryStorage)
    datasets.insert(SAMPLE_DATASET)
    comps = ['CU', 'MG', 'VA']
    phase_name = 'LAVES_C15'
    configuration = ('MG', 'CU')
    symmetry = None
    desired_props = ['HM_FORM']

    # The following lines replace "get_data" in a more functional form
    solver_qry = (tinydb.where('solver').test(
        symmetry_filter, configuration,
        recursive_tuplify(symmetry) if symmetry else symmetry))
    desired_data = get_prop_data(comps,
                                 phase_name,
                                 desired_props,
                                 datasets,
                                 additional_query=solver_qry)
    desired_data = filter_configurations(desired_data, configuration, symmetry)
    desired_data = filter_temperatures(desired_data)

    assert len(desired_data) == 1
    desired_data = desired_data[0]
    assert desired_data['components'] == comps
    assert desired_data['phases'][0] == phase_name
    assert desired_data['solver']['sublattice_site_ratios'] == [2, 1]
    assert desired_data['solver']['sublattice_configurations'] == (('MG',
                                                                    'CU'), )
    assert desired_data['conditions']['P'] == 101325
    assert desired_data['conditions']['T'] == 298.15
    assert desired_data['output'] == 'HM_FORM'
    assert desired_data['values'] == np.array([[[34720.0]]])
def get_equilibrium_thermochemical_data(
    dbf: Database,
    comps: Sequence[str],
    phases: Sequence[str],
    datasets: PickleableTinyDB,
    model: Optional[Dict[str, Model]] = None,
    parameters: Optional[Dict[str, float]] = None,
    data_weight_dict: Optional[Dict[str, float]] = None,
) -> Sequence[EqPropData]:
    """
    Get all the EqPropData for each matching equilibrium thermochemical dataset in the datasets

    Parameters
    ----------
    dbf : Database
        Database with parameters to fit
    comps : Sequence[str]
        List of pure element components used to find matching datasets.
    phases : Sequence[str]
        List of phases used to search for matching datasets.
    datasets : PickleableTinyDB
        Datasets that contain single phase data
    model : Optional[Dict[str, Type[Model]]]
        Dictionary phase names to pycalphad Model classes.
    parameters : Optional[Dict[str, float]]
        Mapping of parameter symbols to values.
    data_weight_dict : Optional[Dict[str, float]]
        Mapping of a data type (e.g. `HM` or `SM`) to a weight.

    Notes
    -----
    Found datasets will be subsets of the components and phases. Equilibrium
    thermochemical data is assumed to be any data that does not have the
    `solver` key, and does not have an output of `ZPF` or `ACR` (which
    correspond to different data types than can be calculated here.)

    Returns
    -------
    Sequence[EqPropData]
    """

    desired_data = datasets.search(
        # data that isn't ZPF or non-equilibrium thermochemical
        (where('output') != 'ZPF') & (~where('solver').exists())
        & (where('output').test(lambda x: 'ACR' not in x))
        &  # activity data not supported yet
        (where('components').test(lambda x: set(x).issubset(comps)))
        & (where('phases').test(lambda x: set(x).issubset(set(phases)))))

    eq_thermochemical_data = []  # 1:1 correspondence with each dataset
    for data in desired_data:
        eq_thermochemical_data.append(
            build_eqpropdata(data,
                             dbf,
                             model=model,
                             parameters=parameters,
                             data_weight_dict=data_weight_dict))
    return eq_thermochemical_data
Exemple #8
0
def test_get_data_for_a_minimal_example():
    """Given a dataset and the congfiguration pertaining to that dataset, we should find the values."""
    SAMPLE_DATASET = {
        "components": ["CU", "MG", "VA"],
        "phases": ["LAVES_C15"],
        "solver": {
            "mode":
            "manual",
            "sublattice_site_ratios": [2, 1],
            "sublattice_configurations": [["CU", "MG"], ["MG", "CU"],
                                          ["MG", "MG"], ["CU", "CU"]]
        },
        "conditions": {
            "P": 101325,
            "T": 298.15
        },
        "output": "HM_FORM",
        "values": [[[-15720, 34720, 7000, 15500]]]
    }
    datasets = PickleableTinyDB(storage=MemoryStorage)
    datasets.insert(SAMPLE_DATASET)
    comps = ['CU', 'MG', 'VA']
    phase_name = 'LAVES_C15'
    configuration = ('MG', 'CU')
    symmetry = None
    desired_props = ['HM_FORM']

    desired_data = get_data(comps, phase_name, configuration, symmetry,
                            datasets, desired_props)
    assert len(desired_data) == 1
    desired_data = desired_data[0]
    assert desired_data['components'] == comps
    assert desired_data['phases'][0] == phase_name
    assert desired_data['solver']['sublattice_site_ratios'] == [2, 1]
    assert desired_data['solver']['sublattice_configurations'] == (('MG',
                                                                    'CU'), )
    assert desired_data['conditions']['P'] == 101325
    assert desired_data['conditions']['T'] == 298.15
    assert desired_data['output'] == 'HM_FORM'
    assert desired_data['values'] == np.array([[[34720.0]]])
def test_weighting_invariance():
    """Test that weights do not affect model selection using perfect L0 and L1 cases."""
    phase_models = {
        "components": ["AL", "B"],
        "phases": {
            "ALPHA": {
                "sublattice_model": [["AL", "B"]],
                "sublattice_site_ratios": [1]
            }
        }
    }

    L0_data = {
        "components": ["AL", "B"],
        "phases": ["ALPHA"],
        "solver": {
            "sublattice_site_ratios": [1],
            "sublattice_occupancies": [[[0.5, 0.5]]],
            "sublattice_configurations": [[["AL", "B"]]],
            "mode": "manual"
        },
        "conditions": {
            "P": 101325,
            "T": 298.15
        },
        "output": "HM_MIX",
        "values": [[[-1000]]]
    }

    L1_data = {
        "components": ["AL", "B"],
        "phases": ["ALPHA"],
        "solver": {
            "sublattice_site_ratios": [1],
            "sublattice_occupancies": [[[0.25, 0.75]], [[0.5, 0.5]],
                                       [[0.75, 0.25]]],
            "sublattice_configurations": [[["AL", "B"]], [["AL", "B"]],
                                          [["AL", "B"]]],
            "mode":
            "manual"
        },
        "conditions": {
            "P": 101325,
            "T": 298.15
        },
        "output": "HM_MIX",
        "values": [[[-1000.0, 0, 1000.0]]]
    }

    # Perfect L0, no weight
    datasets_db = PickleableTinyDB(storage=MemoryStorage)
    datasets_db.insert(L0_data)
    dbf = generate_parameters(phase_models, datasets_db, 'SGTE91', 'linear')
    datasets_db.close()
    params = dbf._parameters.search(where('parameter_type') == 'L')
    print([f"L{p['parameter_order']}: {p['parameter']}" for p in params])
    print({
        str(p['parameter']): dbf.symbols[str(p['parameter'])]
        for p in params
    })
    assert len(params) == 1
    assert dbf.symbols['VV0000'] == -4000

    # Perfect L0, with weight
    datasets_db = PickleableTinyDB(storage=MemoryStorage)
    L0_data['weight'] = 0.1  # lower weight
    datasets_db.insert(L0_data)
    dbf = generate_parameters(phase_models, datasets_db, 'SGTE91', 'linear')
    datasets_db.close()
    params = dbf._parameters.search(where('parameter_type') == 'L')
    print([f"L{p['parameter_order']}: {p['parameter']}" for p in params])
    print({
        str(p['parameter']): dbf.symbols[str(p['parameter'])]
        for p in params
    })
    assert len(params) == 1
    assert dbf.symbols['VV0000'] == -4000

    # Perfect L1, no weight
    datasets_db = PickleableTinyDB(storage=MemoryStorage)
    datasets_db.insert(L1_data)
    dbf = generate_parameters(phase_models, datasets_db, 'SGTE91', 'linear')
    datasets_db.close()
    params = dbf._parameters.search(where('parameter_type') == 'L')
    print([f"L{p['parameter_order']}: {p['parameter']}" for p in params])
    print({
        str(p['parameter']): dbf.symbols[str(p['parameter'])]
        for p in params
    })
    assert len(params) == 2
    assert np.isclose(dbf.symbols['VV0000'], 1000 * 32 / 3)  # L1
    assert np.isclose(dbf.symbols['VV0001'], 0)  # L0

    # Perfect L1, with weight
    datasets_db = PickleableTinyDB(storage=MemoryStorage)
    L1_data['weight'] = 0.1  # lower weight
    datasets_db.insert(L1_data)
    dbf = generate_parameters(phase_models, datasets_db, 'SGTE91', 'linear')
    datasets_db.close()
    params = dbf._parameters.search(where('parameter_type') == 'L')
    print([f"L{p['parameter_order']}: {p['parameter']}" for p in params])
    print({
        str(p['parameter']): dbf.symbols[str(p['parameter'])]
        for p in params
    })
    # TODO: sometimes the presence of L0 terms can be flaky
    # assert len(params) == 2
    assert np.isclose(dbf.symbols['VV0000'], 1000 * 32 / 3)  # L1
Exemple #10
0
def get_zpf_data(dbf: Database,
                 comps: Sequence[str],
                 phases: Sequence[str],
                 datasets: PickleableTinyDB,
                 parameters: Dict[str, float],
                 model: Optional[Dict[str, Type[Model]]] = None):
    """
    Return the ZPF data used in the calculation of ZPF error

    Parameters
    ----------
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    parameters : dict
        Dictionary mapping symbols to optimize to their initial values
    model : Optional[Dict[str, Type[Model]]]
        Dictionary phase names to pycalphad Model classes.

    Returns
    -------
    list
        List of data dictionaries with keys ``weight``, ``phase_regions`` and ``dataset_references``.
    """
    desired_data = datasets.search(
        (tinydb.where('output') == 'ZPF')
        & (tinydb.where('components').test(lambda x: set(x).issubset(comps)))
        & (tinydb.where('phases').test(
            lambda x: len(set(phases).intersection(x)) > 0)))

    zpf_data = []  # 1:1 correspondence with each dataset
    for data in desired_data:
        data_comps = list(set(data['components']).union({'VA'}))
        species = sorted(unpack_components(dbf, data_comps), key=str)
        data_phases = filter_phases(dbf, species, candidate_phases=phases)
        models = instantiate_models(dbf,
                                    species,
                                    data_phases,
                                    model=model,
                                    parameters=parameters)
        # assumed N, P, T state variables
        phase_recs = build_phase_records(dbf,
                                         species,
                                         data_phases, {v.N, v.P, v.T},
                                         models,
                                         parameters=parameters,
                                         build_gradients=True,
                                         build_hessians=True)
        all_phase_points = {
            phase_name: _sample_phase_constitution(models[phase_name],
                                                   point_sample, True, 50)
            for phase_name in data_phases
        }
        all_regions = data['values']
        conditions = data['conditions']
        phase_regions = []
        # Each phase_region is one set of phases in equilibrium (on a tie-line),
        # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]]
        for idx, phase_region in enumerate(all_regions):
            # Extract the conditions for entire phase region
            pot_conds = _extract_pot_conds(conditions, idx)
            pot_conds.setdefault(v.N, 1.0)  # Add v.N condition, if missing
            # Extract all the phases and compositions from the tie-line points
            vertices = []
            for vertex in phase_region:
                phase_name, comp_conds, disordered_flag = _extract_phases_comps(
                    vertex)
                # Construct single-phase points satisfying the conditions for each phase in the region
                mod = models[phase_name]
                composition = _compute_vertex_composition(
                    data_comps, comp_conds)
                if np.any(np.isnan(composition)):
                    # We can't construct points because we don't have a known composition
                    has_missing_comp_cond = True
                    phase_points = None
                elif _phase_is_stoichiometric(mod):
                    has_missing_comp_cond = False
                    phase_points = None
                else:
                    has_missing_comp_cond = False
                    # Only sample points that have an average mass residual within tol
                    tol = 0.02
                    phase_points = _subsample_phase_points(
                        phase_recs[phase_name], all_phase_points[phase_name],
                        composition, tol)
                    assert phase_points.shape[
                        0] > 0, f"phase {phase_name} must have at least one set of points within the target tolerance {pot_conds} {comp_conds}"
                vtx = RegionVertex(phase_name, composition, comp_conds,
                                   phase_points, phase_recs, disordered_flag,
                                   has_missing_comp_cond)
                vertices.append(vtx)
            region = PhaseRegion(vertices, pot_conds, species, data_phases,
                                 models)
            phase_regions.append(region)

        data_dict = {
            'weight': data.get('weight', 1.0),
            'phase_regions': phase_regions,
            'dataset_reference': data['reference']
        }
        zpf_data.append(data_dict)
    return zpf_data
Exemple #11
0
def datasets_db():
    """Returns a clean instance of a PickleableTinyDB for datasets"""
    db = PickleableTinyDB(storage=MemoryStorage)
    yield db
    db.close()
Exemple #12
0
def generate_parameters(phase_models,
                        datasets,
                        ref_state,
                        excess_model,
                        ridge_alpha=None,
                        aicc_penalty_factor=None,
                        dbf=None):
    """Generate parameters from given phase models and datasets

    Parameters
    ----------
    phase_models : dict
        Dictionary of components and phases to fit.
    datasets : PickleableTinyDB
        database of single- and multi-phase to fit.
    ref_state : str
        String of the reference data to use, e.g. 'SGTE91' or 'SR2016'
    excess_model : str
        String of the type of excess model to fit to, e.g. 'linear'
    ridge_alpha : float
        Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults
        to None, which falls back to ordinary least squares regression.
        For now, the parameter is applied to all features.
    aicc_penalty_factor : dict
        Map of phase name to feature to a multiplication factor for the AICc's parameter penalty.
    dbf : Database
        Initial pycalphad Database that can have parameters that would not be fit by ESPEI

    Returns
    -------
    pycalphad.Database

    """
    # Set NumPy print options so logged arrays print on one line. Reset at the end.
    np.set_printoptions(linewidth=sys.maxsize)
    _log.info('Generating parameters.')
    _log.trace('Found the following user reference states: %s',
               espei.refdata.INSERTED_USER_REFERENCE_STATES)
    refdata = getattr(espei.refdata, ref_state)
    aliases = extract_aliases(phase_models)
    dbf = initialize_database(phase_models, ref_state, dbf)
    # Fit phases in alphabetic order so the VV#### counter is constistent between runs
    for phase_name, phase_data in sorted(phase_models['phases'].items(),
                                         key=operator.itemgetter(0)):
        if phase_name in dbf.phases:
            symmetry = phase_data.get('equivalent_sublattices', None)
            # Filter datasets by thermochemical data for this phase
            dataset = tinydb.Query()
            phase_filtered_datasets = PickleableTinyDB(
                storage=tinydb.storages.MemoryStorage)
            single_phase_thermochemical_query = (
                (dataset.phases == [phase_name])  # TODO: aliases support
                & dataset.solver.exists())
            phase_filtered_datasets.insert_multiple(
                datasets.search(single_phase_thermochemical_query))
            phase_fit(dbf,
                      phase_name,
                      symmetry,
                      phase_filtered_datasets,
                      refdata,
                      ridge_alpha,
                      aicc_penalty=aicc_penalty_factor,
                      aliases=aliases)
    _log.info('Finished generating parameters.')
    np.set_printoptions(linewidth=75)
    return dbf
Exemple #13
0
def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str],
                 datasets: PickleableTinyDB, parameters: Dict[str, float]):
    """
    Return the ZPF data used in the calculation of ZPF error

    Parameters
    ----------
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    parameters : dict
        Dictionary mapping symbols to optimize to their initial values

    Returns
    -------
    list
        List of data dictionaries with keys ``weight``, ``data_comps`` and
        ``phase_regions``. ``data_comps`` are the components for the data in
        question. ``phase_regions`` are the ZPF phases, state variables and compositions.
    """
    desired_data = datasets.search(
        (tinydb.where('output') == 'ZPF')
        & (tinydb.where('components').test(lambda x: set(x).issubset(comps)))
        & (tinydb.where('phases').test(
            lambda x: len(set(phases).intersection(x)) > 0)))

    zpf_data = []  # 1:1 correspondence with each dataset
    for data in desired_data:
        data_comps = list(set(data['components']).union({'VA'}))
        species = sorted(unpack_components(dbf, data_comps), key=str)
        data_phases = filter_phases(dbf, species, candidate_phases=phases)
        models = instantiate_models(dbf,
                                    species,
                                    data_phases,
                                    parameters=parameters)
        all_regions = data['values']
        conditions = data['conditions']
        phase_regions = []
        # Each phase_region is one set of phases in equilibrium (on a tie-line),
        # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]]
        for idx, phase_region in enumerate(all_regions):
            # We need to construct a PhaseRegion by matching up phases/compositions to the conditions
            if len(phase_region) < 2:
                # Skip single-phase regions for fitting purposes
                continue
            # Extract the conditions for entire phase region
            region_potential_conds = extract_conditions(conditions, idx)
            region_potential_conds[v.N] = region_potential_conds.get(
                v.N) or 1.0  # Add v.N condition, if missing
            # Extract all the phases and compositions from the tie-line points
            region_phases, region_comp_conds, phase_flags = extract_phases_comps(
                phase_region)
            region_phase_records = [
                build_phase_records(dbf,
                                    species,
                                    data_phases, {
                                        **region_potential_conds,
                                        **comp_conds
                                    },
                                    models,
                                    parameters=parameters,
                                    build_gradients=True,
                                    build_hessians=True)
                for comp_conds in region_comp_conds
            ]
            phase_regions.append(
                PhaseRegion(region_phases, region_potential_conds,
                            region_comp_conds, phase_flags, dbf, species,
                            data_phases, models, region_phase_records))

        data_dict = {
            'weight': data.get('weight', 1.0),
            'data_comps': data_comps,
            'phase_regions': phase_regions,
            'dataset_reference': data['reference']
        }
        zpf_data.append(data_dict)
    return zpf_data