Beispiel #1
0
def get_prop_samples(dbf, comps, phase_name, desired_data):
    """
    Return data values and the conditions to calculate them by pycalphad
    calculate from the datasets

    Parameters
    ----------
    dbf : pycalphad.Database
        Database to consider
    comps : list
        List of active component names
    phase_name : str
        Name of the phase to consider from the Database
    desired_data : list
        List of dictionary datasets that contain the values to sample

    Returns
    -------
    dict
        Dictionary of condition kwargs for pycalphad's calculate and the expected values

    """
    # TODO: assumes T, P as conditions
    # sublattice constituents are Species objects, so we need to be doing intersections with those
    species_comps = unpack_components(dbf, comps)
    phase_constituents = dbf.phases[phase_name].constituents
    # phase constituents must be filtered to only active:
    phase_constituents = [[c.name for c in sorted(subl_constituents.intersection(set(species_comps)))] for subl_constituents in phase_constituents]

    # calculate needs points, state variable lists, and values to compare to
    calculate_dict = {
        'P': np.array([]),
        'T': np.array([]),
        'points': np.atleast_2d([[]]).reshape(-1, sum([len(subl) for subl in phase_constituents])),
        'values': np.array([]),
    }

    for datum in desired_data:
        # extract the data we care about
        datum_T = datum['conditions']['T']
        datum_P = datum['conditions']['P']
        configurations = datum['solver']['sublattice_configurations']
        occupancies = datum['solver'].get('sublattice_occupancies')
        values = np.array(datum['values'])

        # broadcast and flatten the conditions arrays
        P, T = ravel_conditions(values, datum_P, datum_T)
        if occupancies is None:
            occupancies = [None] * len(configurations)

        # calculate the points arrays, should be 2d array of points arrays
        points = np.array([calculate_points_array(phase_constituents, config, occup) for config, occup in zip(configurations, occupancies)])

        # add everything to the calculate_dict
        calculate_dict['P'] = np.concatenate([calculate_dict['P'], P])
        calculate_dict['T'] = np.concatenate([calculate_dict['T'], T])
        calculate_dict['points'] = np.concatenate([calculate_dict['points'], np.repeat(points, len(T)/points.shape[0], axis=0)], axis=0)
        calculate_dict['values'] = np.concatenate([calculate_dict['values'], values.flatten()])

    return calculate_dict
Beispiel #2
0
def get_prop_samples(desired_data, constituents):
    """
    Return data values and the conditions to calculate them using pycalphad.calculate

    Parameters
    ----------
    desired_data : List[Dict[str, Any]]
        List of dataset dictionaries that contain the values to sample
    constituents : List[List[str]]
        Names of constituents in each sublattice.

    Returns
    -------
    Dict[str, Union[float, ArrayLike, List[float]]]
        Dictionary of condition kwargs for pycalphad's calculate and the expected values

    """
    # TODO: assumes T, P as conditions
    # calculate needs points, state variable lists, and values to compare to
    num_dof = sum(map(len, constituents))
    calculate_dict = {
        'P': np.array([]),
        'T': np.array([]),
        'points': np.atleast_2d([[]]).reshape(-1, num_dof),
        'values': np.array([]),
        'weights': [],
        'references': [],
    }

    for datum in desired_data:
        # extract the data we care about
        datum_T = datum['conditions']['T']
        datum_P = datum['conditions']['P']
        configurations = datum['solver']['sublattice_configurations']
        occupancies = datum['solver'].get('sublattice_occupancies')
        values = np.array(datum['values'])
        # Broadcast the weights to the shape of the values. This ensures that
        # the sizes of the weights and values are the same, which is important
        # because they are flattened later (so the shape information is lost).
        weights = np.broadcast_to(np.asarray(datum.get('weight', 1.0)),
                                  values.shape)

        # broadcast and flatten the conditions arrays
        P, T = ravel_conditions(values, datum_P, datum_T)
        if occupancies is None:
            occupancies = [None] * len(configurations)

        # calculate the points arrays, should be 2d array of points arrays
        points = np.array([
            calculate_points_array(constituents, config, occup)
            for config, occup in zip(configurations, occupancies)
        ])
        assert values.shape == weights.shape, f"Values data shape {values.shape} does not match weights shape {weights.shape}"

        # add everything to the calculate_dict
        calculate_dict['P'] = np.concatenate([calculate_dict['P'], P])
        calculate_dict['T'] = np.concatenate([calculate_dict['T'], T])
        calculate_dict['points'] = np.concatenate([
            calculate_dict['points'],
            np.tile(points, (values.shape[0] * values.shape[1], 1))
        ],
                                                  axis=0)
        calculate_dict['values'] = np.concatenate(
            [calculate_dict['values'],
             values.flatten()])
        calculate_dict['weights'].extend(weights.flatten())
        calculate_dict['references'].extend(
            [datum.get('reference', "") for _ in range(values.flatten().size)])
    return calculate_dict
Beispiel #3
0
def calculate_activity_error(dbf,
                             comps,
                             phases,
                             datasets,
                             parameters=None,
                             phase_models=None,
                             callables=None,
                             grad_callables=None,
                             hess_callables=None,
                             massfuncs=None,
                             massgradfuncs=None):
    """
    Return the sum of square error from activity data

    Parameters
    ----------
    dbf : pycalphad.Database
        Database to consider
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    parameters : dict
        Dictionary of symbols that will be overridden in pycalphad.equilibrium
    phase_models : dict
        Phase models to pass to pycalphad calculations
    callables : dict
        Callables to pass to pycalphad
    grad_callables : dict
        Gradient callables to pass to pycalphad
    hess_callables : dict
        Hessian callables to pass to pycalphad
    massfuncs : dict
        Callables of mass derivatives to pass to pycalphad
    massgradfuncs : dict
        Gradient callables of mass derivatives to pass to pycalphad

    Returns
    -------
    float
        A single float of the sum of square errors

    Notes
    -----
    General procedure:
    1. Get the datasets
    2. For each dataset

        a. Calculate reference state equilibrium
        b. Calculate current chemical potentials
        c. Find the target chemical potentials
        d. Calculate error due to chemical potentials

    """
    if parameters is None:
        parameters = {}

    activity_datasets = datasets.search(
        (tinydb.where('output').test(lambda x: 'ACR' in x))
        & (tinydb.where('components').test(lambda x: set(x).issubset(comps))))

    error = 0
    if len(activity_datasets) == 0:
        return error

    for ds in activity_datasets:
        acr_component = ds['output'].split('_')[1]  # the component of interest
        # calculate the reference state equilibrium
        ref = ds['reference_state']
        ref_conditions = {
            _map_coord_to_variable(coord): val
            for coord, val in ref['conditions'].items()
        }
        ref_result = equilibrium(
            dbf,
            ds['components'],
            ref['phases'],
            ref_conditions,
            model=phase_models,
            parameters=parameters,
            massfuncs=massfuncs,
            massgradfuncs=massgradfuncs,
            callables=callables,
            grad_callables=grad_callables,
            hess_callables=hess_callables,
        )

        # calculate current chemical potentials
        # get the conditions
        conditions = {}
        # first make sure the conditions are paired
        # only get the compositions, P and T are special cased
        conds_list = [(cond, value)
                      for cond, value in ds['conditions'].items()
                      if cond not in ('P', 'T')]
        # ravel the conditions
        # we will ravel each composition individually, since they all must have the same shape
        for comp_name, comp_x in conds_list:
            P, T, X = ravel_conditions(ds['values'], ds['conditions']['P'],
                                       ds['conditions']['T'], comp_x)
            conditions[v.P] = P
            conditions[v.T] = T
            conditions[_map_coord_to_variable(comp_name)] = X
        # do the calculations
        # we cannot currently turn broadcasting off, so we have to do equilibrium one by one
        # invert the conditions dicts to make a list of condition dicts rather than a condition dict of lists
        # assume now that the ravelled conditions all have the same size
        conditions_list = [{c: conditions[c][i]
                            for c in conditions.keys()}
                           for i in range(len(conditions[v.T]))]
        current_chempots = []
        for conds in conditions_list:
            sample_eq_res = equilibrium(
                dbf,
                ds['components'],
                phases,
                conds,
                model=phase_models,
                parameters=parameters,
                massfuncs=massfuncs,
                massgradfuncs=massgradfuncs,
                callables=callables,
                grad_callables=grad_callables,
                hess_callables=hess_callables,
            )
            current_chempots.append(
                sample_eq_res.MU.sel(
                    component=acr_component).values.flatten()[0])
        current_chempots = np.array(current_chempots)

        # calculate target chempots
        target_chempots = target_chempots_from_activity(
            acr_component,
            np.array(ds['values']).flatten(), conditions[v.T], ref_result)
        # calculate the error
        error += chempot_error(current_chempots, target_chempots)
    # TODO: write a test for this
    if np.any(np.isnan(np.array([
            error
    ], dtype=np.float64))):  # must coerce sympy.core.numbers.Float to float64
        return -np.inf
    return error
Beispiel #4
0
def calculate_activity_error(dbf,
                             comps,
                             phases,
                             datasets,
                             parameters=None,
                             phase_models=None,
                             callables=None,
                             data_weight=1.0):
    """
    Return the sum of square error from activity data

    Parameters
    ----------
    dbf : pycalphad.Database
        Database to consider
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    parameters : dict
        Dictionary of symbols that will be overridden in pycalphad.equilibrium
    phase_models : dict
        Phase models to pass to pycalphad calculations
    callables : dict
        Callables to pass to pycalphad
    data_weight : float
        Weight for standard deviation of activity measurements, dimensionless.
        Corresponds to the standard deviation of differences in chemical
        potential in typical measurements of activity, in J/mol.

    Returns
    -------
    float
        A single float of the sum of square errors

    Notes
    -----
    General procedure:
    1. Get the datasets
    2. For each dataset

        a. Calculate reference state equilibrium
        b. Calculate current chemical potentials
        c. Find the target chemical potentials
        d. Calculate error due to chemical potentials

    """
    std_dev = 500  # J/mol

    if parameters is None:
        parameters = {}

    activity_datasets = datasets.search(
        (tinydb.where('output').test(lambda x: 'ACR' in x))
        & (tinydb.where('components').test(lambda x: set(x).issubset(comps))))

    error = 0
    if len(activity_datasets) == 0:
        return error

    for ds in activity_datasets:
        acr_component = ds['output'].split('_')[1]  # the component of interest
        # calculate the reference state equilibrium
        ref = ds['reference_state']
        # data_comps and data_phases ensures that we only do calculations on
        # the subsystem of the system defining the data.
        data_comps = ds['components']
        data_phases = filter_phases(dbf,
                                    unpack_components(dbf, data_comps),
                                    candidate_phases=phases)
        ref_conditions = {
            _map_coord_to_variable(coord): val
            for coord, val in ref['conditions'].items()
        }
        ref_result = equilibrium(dbf,
                                 data_comps,
                                 ref['phases'],
                                 ref_conditions,
                                 model=phase_models,
                                 parameters=parameters,
                                 callables=callables)

        # calculate current chemical potentials
        # get the conditions
        conditions = {}
        # first make sure the conditions are paired
        # only get the compositions, P and T are special cased
        conds_list = [(cond, value)
                      for cond, value in ds['conditions'].items()
                      if cond not in ('P', 'T')]
        # ravel the conditions
        # we will ravel each composition individually, since they all must have the same shape
        for comp_name, comp_x in conds_list:
            P, T, X = ravel_conditions(ds['values'], ds['conditions']['P'],
                                       ds['conditions']['T'], comp_x)
            conditions[v.P] = P
            conditions[v.T] = T
            conditions[_map_coord_to_variable(comp_name)] = X
        # do the calculations
        # we cannot currently turn broadcasting off, so we have to do equilibrium one by one
        # invert the conditions dicts to make a list of condition dicts rather than a condition dict of lists
        # assume now that the ravelled conditions all have the same size
        conditions_list = [{c: conditions[c][i]
                            for c in conditions.keys()}
                           for i in range(len(conditions[v.T]))]
        current_chempots = []
        for conds in conditions_list:
            sample_eq_res = equilibrium(dbf,
                                        data_comps,
                                        data_phases,
                                        conds,
                                        model=phase_models,
                                        parameters=parameters,
                                        callables=callables)
            current_chempots.append(
                sample_eq_res.MU.sel(
                    component=acr_component).values.flatten()[0])
        current_chempots = np.array(current_chempots)

        # calculate target chempots
        samples = np.array(ds['values']).flatten()
        target_chempots = target_chempots_from_activity(
            acr_component, samples, conditions[v.T], ref_result)
        # calculate the error
        weight = ds.get('weight', 1.0)
        pe = chempot_error(current_chempots,
                           target_chempots,
                           std_dev=std_dev / data_weight / weight)
        error += np.sum(pe)
        _log.debug(
            'Data: %s, chemical potential difference: %s, probability: %s, reference: %s',
            samples, current_chempots - target_chempots, pe, ds["reference"])

    # TODO: write a test for this
    if np.any(np.isnan(np.array([
            error
    ], dtype=np.float64))):  # must coerce sympy.core.numbers.Float to float64
        return -np.inf
    return error