def build_eqpropdata(
        data: tinydb.database.Document,
        dbf: Database,
        parameters: Optional[Dict[str, float]] = None,
        data_weight_dict: Optional[Dict[str, float]] = None) -> EqPropData:
    """
    Build EqPropData for the calculations corresponding to a single dataset.

    Parameters
    ----------
    data : tinydb.database.Document
        Document corresponding to a single ESPEI dataset.
    dbf : Database
        Database that should be used to construct the `Model` and `PhaseRecord` objects.
    parameters : Optional[Dict[str, float]]
        Mapping of parameter symbols to values.
    data_weight_dict : Optional[Dict[str, float]]
        Mapping of a data type (e.g. `HM` or `SM`) to a weight.

    Returns
    -------
    EqPropData
    """
    parameters = parameters if parameters is not None else {}
    data_weight_dict = data_weight_dict if data_weight_dict is not None else {}
    property_std_deviation = {
        'HM': 500.0,  # J/mol
        'SM': 0.2,  # J/K-mol
        'CPM': 0.2,  # J/K-mol
    }

    params_keys, _ = extract_parameters(parameters)

    data_comps = list(set(data['components']).union({'VA'}))
    species = sorted(unpack_components(dbf, data_comps), key=str)
    data_phases = filter_phases(dbf, species, candidate_phases=data['phases'])
    models = instantiate_models(dbf,
                                species,
                                data_phases,
                                parameters=parameters)
    output = data['output']
    property_output = output.split('_')[
        0]  # property without _FORM, _MIX, etc.
    samples = np.array(data['values']).flatten()
    reference = data.get('reference', '')

    # Models are now modified in response to the data from this data
    if 'reference_states' in data:
        property_output = output[:-1] if output.endswith(
            'R'
        ) else output  # unreferenced model property so we can tell shift_reference_state what to build.
        reference_states = []
        for el, vals in data['reference_states'].items():
            reference_states.append(
                ReferenceState(
                    v.Species(el),
                    vals['phase'],
                    fixed_statevars=vals.get('fixed_state_variables')))
        for mod in models.values():
            mod.shift_reference_state(reference_states,
                                      dbf,
                                      output=(property_output, ))

    data['conditions'].setdefault(
        'N', 1.0
    )  # Add default for N. Nothing else is supported in pycalphad anyway.
    pot_conds = OrderedDict([(getattr(v, key),
                              unpack_condition(data['conditions'][key]))
                             for key in sorted(data['conditions'].keys())
                             if not key.startswith('X_')])
    comp_conds = OrderedDict([(v.X(key[2:]),
                               unpack_condition(data['conditions'][key]))
                              for key in sorted(data['conditions'].keys())
                              if key.startswith('X_')])

    phase_records = build_phase_records(dbf,
                                        species,
                                        data_phases, {
                                            **pot_conds,
                                            **comp_conds
                                        },
                                        models,
                                        parameters=parameters,
                                        build_gradients=True,
                                        build_hessians=True)

    # Now we need to unravel the composition conditions
    # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the
    # composition conditions are only broadcast against the potentials, not
    # each other. Each individual composition needs to be computed
    # independently, since broadcasting over composition cannot be turned off
    # in pycalphad.
    rav_comp_conds = [
        OrderedDict(zip(comp_conds.keys(), pt_comps))
        for pt_comps in zip(*comp_conds.values())
    ]

    # Build weights, should be the same size as the values
    total_num_calculations = len(rav_comp_conds) * np.prod(
        [len(vals) for vals in pot_conds.values()])
    dataset_weights = np.array(data.get('weight',
                                        1.0)) * np.ones(total_num_calculations)
    weights = (property_std_deviation.get(property_output, 1.0) /
               data_weight_dict.get(property_output, 1.0) /
               dataset_weights).flatten()

    return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds,
                      models, params_keys, phase_records, output, samples,
                      weights, reference)
Exemple #2
0
def _compute_phase_values(components,
                          statevar_dict,
                          points,
                          phase_record,
                          output,
                          maximum_internal_dof,
                          broadcast=True,
                          parameters=None,
                          fake_points=False,
                          largest_energy=None):
    """
    Calculate output values for a particular phase.

    Parameters
    ----------
    components : list
        Names of components to consider in the calculation.
    statevar_dict : OrderedDict {str -> float or sequence}
        Mapping of state variables to desired values. This will broadcast if necessary.
    points : ndarray
        Inputs to 'func', except state variables. Columns should be in 'variables' order.
    phase_record : PhaseRecord
        Contains callable for energy and phase metadata.
    output : string
        Desired name of the output result in the Dataset.
    maximum_internal_dof : int
        Largest number of internal degrees of freedom of any phase. This is used
        to guarantee different phase's Datasets can be concatenated.
    broadcast : bool
        If True, broadcast state variables against each other to create a grid.
        If False, assume state variables are given as equal-length lists (or single-valued).
    parameters : OrderedDict {str -> float or sequence}, optional
        Maps SymPy symbols to a scalar or 1-D array. The arrays must be equal length.
        The corresponding PhaseRecord must have been initialized with the same parameters.
    fake_points : bool, optional (Default: False)
        If True, the first few points of the output surface will be fictitious
        points used to define an equilibrium hyperplane guaranteed to be above
        all the other points. This is used for convex hull computations.

    Returns
    -------
    Dataset of the output attribute as a function of state variables

    Examples
    --------
    None yet.
    """
    if broadcast:
        # Broadcast compositions and state variables along orthogonal axes
        # This lets us eliminate an expensive Python loop
        statevars = np.meshgrid(*itertools.chain(statevar_dict.values(),
                                                 [np.empty(points.shape[-2])]),
                                sparse=True,
                                indexing='ij')[:-1]
        points = broadcast_to(
            points,
            tuple(len(np.atleast_1d(x))
                  for x in statevar_dict.values()) + points.shape[-2:])
    else:
        statevars = list(np.atleast_1d(x) for x in statevar_dict.values())
        statevars_ = []
        for statevar in statevars:
            if (len(statevar) != len(points)) and (len(statevar) == 1):
                statevar = np.repeat(statevar, len(points))
            if (len(statevar) != len(points)) and (len(statevar) != 1):
                raise ValueError(
                    'Length of state variable list and number of given points must be equal when '
                    'broadcast=False.')
            statevars_.append(statevar)
        statevars = statevars_
    pure_elements = [list(x.constituents.keys()) for x in components]
    pure_elements = sorted(
        set([
            el.upper() for constituents in pure_elements for el in constituents
        ]))
    pure_elements = [x for x in pure_elements if x != 'VA']
    # func may only have support for vectorization along a single axis (no broadcasting)
    # we need to force broadcasting and flatten the result before calling
    bc_statevars = np.ascontiguousarray(
        [broadcast_to(x, points.shape[:-1]).reshape(-1) for x in statevars])
    pts = points.reshape(-1, points.shape[-1])
    dof = np.ascontiguousarray(np.concatenate((bc_statevars.T, pts), axis=1))
    phase_compositions = np.zeros((dof.shape[0], len(pure_elements)),
                                  order='F')

    param_symbols, parameter_array = extract_parameters(parameters)
    parameter_array_length = parameter_array.shape[0]
    if parameter_array_length == 0:
        # No parameters specified
        phase_output = np.zeros(dof.shape[0], order='C')
        phase_record.obj_2d(phase_output, dof)
    else:
        # Vectorized parameter arrays
        phase_output = np.zeros((dof.shape[0], parameter_array_length),
                                order='C')
        phase_record.obj_parameters_2d(phase_output, dof, parameter_array)

    for el_idx in range(len(pure_elements)):
        phase_record.mass_obj_2d(phase_compositions[:, el_idx], dof, el_idx)

    max_tieline_vertices = len(pure_elements)
    if isinstance(phase_output, (float, int)):
        phase_output = broadcast_to(phase_output, points.shape[:-1])
    if isinstance(phase_compositions, (float, int)):
        phase_compositions = broadcast_to(
            phase_output, points.shape[:-1] + (len(pure_elements), ))
    phase_output = np.asarray(phase_output, dtype=np.float)
    if parameter_array_length <= 1:
        phase_output.shape = points.shape[:-1]
    else:
        phase_output.shape = points.shape[:-1] + (parameter_array_length, )
    phase_compositions = np.asarray(phase_compositions, dtype=np.float)
    phase_compositions.shape = points.shape[:-1] + (len(pure_elements), )
    if fake_points:
        output_shape = points.shape[:-2] + (max_tieline_vertices, )
        if parameter_array_length > 1:
            output_shape = output_shape + (parameter_array_length, )
            concat_axis = -2
        else:
            concat_axis = -1
        phase_output = np.concatenate(
            (broadcast_to(largest_energy, output_shape), phase_output),
            axis=concat_axis)
        phase_names = np.concatenate(
            (broadcast_to('_FAKE_', points.shape[:-2] +
                          (max_tieline_vertices, )),
             np.full(points.shape[:-1],
                     phase_record.phase_name,
                     dtype='U' + str(len(phase_record.phase_name)))),
            axis=-1)
    else:
        phase_names = np.full(points.shape[:-1],
                              phase_record.phase_name,
                              dtype='U' + str(len(phase_record.phase_name)))
    if fake_points:
        phase_compositions = np.concatenate((np.broadcast_to(
            np.eye(len(pure_elements)), points.shape[:-2] +
            (max_tieline_vertices, len(pure_elements))), phase_compositions),
                                            axis=-2)

    coordinate_dict = {'component': pure_elements}
    # Resize 'points' so it has the same number of columns as the maximum
    # number of internal degrees of freedom of any phase in the calculation.
    # We do this so that everything is aligned for concat.
    # Waste of memory? Yes, but the alternatives are unclear.
    # In each case, first check if we need to do this...
    # It can be expensive for many points (~14s for 500M points)
    if fake_points:
        desired_shape = points.shape[:-2] + (
            max_tieline_vertices + points.shape[-2], maximum_internal_dof)
        expanded_points = np.full(desired_shape, np.nan)
        expanded_points[..., len(pure_elements):, :points.shape[-1]] = points
    else:
        desired_shape = points.shape[:-1] + (maximum_internal_dof, )
        if points.shape == desired_shape:
            expanded_points = points
        else:
            # TODO: most optimal solution would be to take pre-extended arrays as an argument and remove this
            # This still copies the array, but is more efficient than filling
            # an array with np.nan, then copying the existing points
            append_nans = np.full(
                desired_shape[:-1] + (desired_shape[-1] - points.shape[-1], ),
                np.nan)
            expanded_points = np.append(points, append_nans, axis=-1)
    if broadcast:
        coordinate_dict.update({
            key: np.atleast_1d(value)
            for key, value in statevar_dict.items()
        })
        output_columns = [str(x) for x in statevar_dict.keys()] + ['points']
    else:
        output_columns = ['points']
    if parameter_array_length > 1:
        parameter_column = ['samples']
        coordinate_dict['param_symbols'] = [str(x) for x in param_symbols]
    else:
        parameter_column = []
    data_arrays = {
        'X': (output_columns + ['component'], phase_compositions),
        'Phase': (output_columns, phase_names),
        'Y': (output_columns + ['internal_dof'], expanded_points),
        output: ([
            'dim_' + str(i) for i in range(
                len(phase_output.shape) -
                (len(output_columns) + len(parameter_column)))
        ] + output_columns + parameter_column, phase_output)
    }
    if not broadcast:
        # Add state variables as data variables rather than as coordinates
        for sym, vals in zip(statevar_dict.keys(), statevars):
            data_arrays.update({sym: (output_columns, vals)})
    if parameter_array_length > 1:
        data_arrays['param_values'] = (['samples',
                                        'param_symbols'], parameter_array)
    return LightDataset(data_arrays, coords=coordinate_dict)
Exemple #3
0
def build_phase_records(dbf,
                        comps,
                        phases,
                        conds,
                        models,
                        output='GM',
                        callables=None,
                        parameters=None,
                        verbose=False,
                        build_gradients=False,
                        build_hessians=False):
    """
    Combine compiled callables and callables from conditions into PhaseRecords.

    Parameters
    ----------
    dbf : Database
        A Database object
    comps : list
        List of component names
    phases : list
        List of phase names
    conds : dict or None
        Conditions for calculation
    models : dict
        Dictionary of {'phase_name': Model()}
    parameters : dict, optional
        Maps SymPy Symbol to numbers, for overriding the values of parameters in the Database.
    callables : dict, optional
        Pre-computed callables. If None are passed, they will be built.
        Maps {'output' -> {'function' -> {'phase_name' -> AutowrapFunction()}}
    output : str
        Output property of the particular Model to sample
    verbose : bool, optional
        Print the name of the phase when its callables are built
    build_gradients : bool
        Whether or not to build gradient functions. Defaults to False. Only
        takes effect if callables are not passed.
    build_hessians : bool
        Whether or not to build Hessian functions. Defaults to False. Only
        takes effect if callables are not passed.

    Returns
    -------
    dict
        Dictionary mapping phase names to PhaseRecord instances.

    Notes
    -----
    If callables are passed, don't rebuild them. This means that the callables
    are not checked for incompatibility. Users of build_callables are
    responsible for ensuring that the state variables, parameters and models
    used to construct the callables are compatible with the ones used to
    build the constraints and phase records.

    """
    parameters = parameters if parameters is not None else {}
    callables = callables if callables is not None else {}
    _constraints = {
        'internal_cons_func': {},
        'internal_cons_jac': {},
        'internal_cons_hess': {},
        'multiphase_cons_func': {},
        'multiphase_cons_jac': {},
        'multiphase_cons_hess': {}
    }
    phase_records = {}
    state_variables = sorted(get_state_variables(models=models, conds=conds),
                             key=str)
    param_symbols, param_values = extract_parameters(parameters)

    if callables.get(output) is None:
        callables = build_callables(dbf,
                                    comps,
                                    phases,
                                    models,
                                    parameter_symbols=parameters.keys(),
                                    output=output,
                                    additional_statevars=state_variables,
                                    build_gradients=build_gradients,
                                    build_hessians=build_hessians)

    for name in phases:
        mod = models[name]
        site_fracs = mod.site_fractions
        # build constraint functions
        cfuncs = build_constraints(mod,
                                   state_variables + site_fracs,
                                   conds,
                                   parameters=param_symbols)
        _constraints['internal_cons_func'][name] = cfuncs.internal_cons_func
        _constraints['internal_cons_jac'][name] = cfuncs.internal_cons_jac
        _constraints['internal_cons_hess'][name] = cfuncs.internal_cons_hess
        _constraints['multiphase_cons_func'][
            name] = cfuncs.multiphase_cons_func
        _constraints['multiphase_cons_jac'][name] = cfuncs.multiphase_cons_jac
        _constraints['multiphase_cons_hess'][
            name] = cfuncs.multiphase_cons_hess
        num_internal_cons = cfuncs.num_internal_cons
        num_multiphase_cons = cfuncs.num_multiphase_cons

        phase_records[name.upper()] = PhaseRecord(
            comps, state_variables, site_fracs, param_values,
            callables[output]['callables'][name],
            callables[output]['grad_callables'][name],
            callables[output]['hess_callables'][name],
            callables[output]['massfuncs'][name],
            callables[output]['massgradfuncs'][name],
            callables[output]['masshessfuncs'][name],
            _constraints['internal_cons_func'][name],
            _constraints['internal_cons_jac'][name],
            _constraints['internal_cons_hess'][name],
            _constraints['multiphase_cons_func'][name],
            _constraints['multiphase_cons_jac'][name],
            _constraints['multiphase_cons_hess'][name], num_internal_cons,
            num_multiphase_cons)

        if verbose:
            print(name + ' ')
    return phase_records
Exemple #4
0
def build_phase_records(dbf, comps, phases, conds, models, output='GM',
                        callables=None, parameters=None, verbose=False,
                        build_gradients=False, build_hessians=False
                        ):
    """
    Combine compiled callables and callables from conditions into PhaseRecords.

    Parameters
    ----------
    dbf : Database
        A Database object
    comps : list
        List of component names
    phases : list
        List of phase names
    conds : dict or None
        Conditions for calculation
    models : dict
        Dictionary of {'phase_name': Model()}
    parameters : dict, optional
        Maps SymPy Symbol to numbers, for overriding the values of parameters in the Database.
    callables : dict, optional
        Pre-computed callables. If None are passed, they will be built.
        Maps {'output' -> {'function' -> {'phase_name' -> AutowrapFunction()}}
    output : str
        Output property of the particular Model to sample
    verbose : bool, optional
        Print the name of the phase when its callables are built
    build_gradients : bool
        Whether or not to build gradient functions. Defaults to False. Only
        takes effect if callables are not passed.
    build_hessians : bool
        Whether or not to build Hessian functions. Defaults to False. Only
        takes effect if callables are not passed.

    Returns
    -------
    dict
        Dictionary mapping phase names to PhaseRecord instances.

    Notes
    -----
    If callables are passed, don't rebuild them. This means that the callables
    are not checked for incompatibility. Users of build_callables are
    responsible for ensuring that the state variables, parameters and models
    used to construct the callables are compatible with the ones used to
    build the constraints and phase records.

    """
    parameters = parameters if parameters is not None else {}
    callables = callables if callables is not None else {}
    _constraints = {
        'internal_cons': {},
        'internal_jac': {},
        'internal_cons_hess': {},
        'mp_cons': {},
        'mp_jac': {},
    }
    phase_records = {}
    state_variables = sorted(get_state_variables(models=models, conds=conds), key=str)
    param_symbols, param_values = extract_parameters(parameters)

    if callables.get(output) is None:
        callables = build_callables(dbf, comps, phases, models,
                                    parameter_symbols=parameters.keys(), output=output,
                                    additional_statevars=state_variables,
                                    build_gradients=build_gradients,
                                    build_hessians=build_hessians)

    for name in phases:
        mod = models[name]
        site_fracs = mod.site_fractions
        # build constraint functions
        cfuncs = build_constraints(mod, state_variables + site_fracs, conds, parameters=param_symbols)
        _constraints['internal_cons'][name] = cfuncs.internal_cons
        _constraints['internal_jac'][name] = cfuncs.internal_jac
        _constraints['internal_cons_hess'][name] = cfuncs.internal_cons_hess
        _constraints['mp_cons'][name] = cfuncs.multiphase_cons
        _constraints['mp_jac'][name] = cfuncs.multiphase_jac
        num_internal_cons = cfuncs.num_internal_cons
        num_multiphase_cons = cfuncs.num_multiphase_cons

        phase_records[name.upper()] = PhaseRecord(comps, state_variables, site_fracs, param_values,
                                                  callables[output]['callables'][name],
                                                  callables[output]['grad_callables'][name],
                                                  callables[output]['hess_callables'][name],
                                                  callables[output]['massfuncs'][name],
                                                  callables[output]['massgradfuncs'][name],
                                                  callables[output]['masshessfuncs'][name],
                                                  _constraints['internal_cons'][name],
                                                  _constraints['internal_jac'][name],
                                                  _constraints['internal_cons_hess'][name],
                                                  _constraints['mp_cons'][name],
                                                  _constraints['mp_jac'][name],
                                                  num_internal_cons,
                                                  num_multiphase_cons)

        if verbose:
            print(name + ' ')
    return phase_records
Exemple #5
0
def build_phase_records(dbf,
                        comps,
                        phases,
                        state_variables,
                        models,
                        output='GM',
                        callables=None,
                        parameters=None,
                        verbose=False,
                        build_gradients=True,
                        build_hessians=True):
    """
    Combine compiled callables and callables from conditions into PhaseRecords.

    Parameters
    ----------
    dbf : Database
        A Database object
    comps : List[Union[str, v.Species]]
        List of active pure elements or species.
    phases : list
        List of phase names
    state_variables : Iterable[v.StateVariable]
        State variables used to produce the generated functions.
    models : Mapping[str, Model]
        Mapping of phase names to model instances
    parameters : dict, optional
        Maps SymEngine Symbol to numbers, for overriding the values of parameters in the Database.
    callables : dict, optional
        Pre-computed callables. If None are passed, they will be built.
        Maps {'output' -> {'function' -> {'phase_name' -> AutowrapFunction()}}
    output : str
        Output property of the particular Model to sample
    verbose : bool, optional
        Print the name of the phase when its callables are built
    build_gradients : bool
        Whether or not to build gradient functions. Defaults to False. Only
        takes effect if callables are not passed.
    build_hessians : bool
        Whether or not to build Hessian functions. Defaults to False. Only
        takes effect if callables are not passed.

    Returns
    -------
    dict
        Dictionary mapping phase names to PhaseRecord instances.

    Notes
    -----
    If callables are passed, don't rebuild them. This means that the callables
    are not checked for incompatibility. Users of build_callables are
    responsible for ensuring that the state variables, parameters and models
    used to construct the callables are compatible with the ones used to
    build the constraints and phase records.

    """
    comps = sorted(unpack_components(dbf, comps))
    parameters = parameters if parameters is not None else {}
    callables = callables if callables is not None else {}
    _constraints = {
        'internal_cons_func': {},
        'internal_cons_jac': {},
        'internal_cons_hess': {},
    }
    phase_records = {}
    state_variables = sorted(get_state_variables(models=models,
                                                 conds=state_variables),
                             key=str)
    param_symbols, param_values = extract_parameters(parameters)

    if callables.get(output) is None:
        callables = build_callables(dbf,
                                    comps,
                                    phases,
                                    models,
                                    parameter_symbols=parameters.keys(),
                                    output=output,
                                    additional_statevars=state_variables,
                                    build_gradients=False,
                                    build_hessians=False)
    # Temporary solution. PhaseRecord needs rework: https://github.com/pycalphad/pycalphad/pull/329#discussion_r634579356
    formulacallables = build_callables(dbf,
                                       comps,
                                       phases,
                                       models,
                                       parameter_symbols=parameters.keys(),
                                       output='G',
                                       additional_statevars=state_variables,
                                       build_gradients=build_gradients,
                                       build_hessians=build_hessians)

    # If a vector of parameters is specified, only pass the first row to the PhaseRecord
    # Future callers of PhaseRecord.obj_parameters_2d() can pass the full param_values array as an argument
    if len(param_values.shape) > 1:
        param_values = param_values[0]

    for name in phases:
        mod = models[name]
        site_fracs = mod.site_fractions
        # build constraint functions
        cfuncs = build_constraints(mod,
                                   state_variables + site_fracs,
                                   parameters=param_symbols)
        _constraints['internal_cons_func'][name] = cfuncs.internal_cons_func
        _constraints['internal_cons_jac'][name] = cfuncs.internal_cons_jac
        _constraints['internal_cons_hess'][name] = cfuncs.internal_cons_hess
        num_internal_cons = cfuncs.num_internal_cons

        phase_records[name.upper()] = PhaseRecord(
            comps, state_variables, site_fracs, param_values,
            callables[output]['callables'][name],
            formulacallables['G']['callables'][name],
            formulacallables['G']['grad_callables'][name],
            formulacallables['G']['hess_callables'][name],
            callables[output]['massfuncs'][name],
            formulacallables['G']['formulamolefuncs'][name],
            formulacallables['G']['formulamolegradfuncs'][name],
            formulacallables['G']['formulamolehessfuncs'][name],
            _constraints['internal_cons_func'][name],
            _constraints['internal_cons_jac'][name],
            _constraints['internal_cons_hess'][name], num_internal_cons)

        if verbose:
            print(name + ' ')
    return phase_records