Example #1
0
def _compare_data_to_parameters(dbf,
                                comps,
                                phase_name,
                                desired_data,
                                mod,
                                configuration,
                                x,
                                y,
                                ax=None):
    """
    Return one set of plotted Axes with data compared to calculated parameters

    Parameters
    ----------
    dbf : Database
        pycalphad thermodynamic database containing the relevant parameters.
    comps : list
        Names of components to consider in the calculation.
    phase_name : str
        Name of the considered phase phase
    desired_data :
    mod : Model
        A pycalphad Model. The Model may or may not have the reference state zeroed out for formation properties.
    configuration :
    x : str
        Model property to plot on the x-axis e.g. 'T', 'HM_MIX', 'SM_FORM'
    y : str
        Model property to plot on the y-axis e.g. 'T', 'HM_MIX', 'SM_FORM'
    ax : matplotlib.Axes
        Default axes used if not specified.

    Returns
    -------
    matplotlib.Axes

    """
    all_samples = np.array(get_samples(desired_data), dtype=np.object)
    endpoints = endmembers_from_interaction(configuration)
    interacting_subls = [
        c for c in list_to_tuple(configuration) if isinstance(c, tuple)
    ]
    disordered_config = False
    if (len(set(interacting_subls)) == 1) and (len(interacting_subls[0]) == 2):
        # This configuration describes all sublattices with the same two elements interacting
        # In general this is a high-dimensional space; just plot the diagonal to see the disordered mixing
        endpoints = [endpoints[0], endpoints[-1]]
        disordered_config = True
    if not ax:
        fig = plt.figure(figsize=plt.figaspect(1))
        ax = fig.gca()
    bar_chart = False
    bar_labels = []
    bar_data = []
    if y.endswith('_FORM'):
        # We were passed a Model object with zeroed out reference states
        yattr = y[:-5]
    else:
        yattr = y
    if len(endpoints) == 1:
        # This is an endmember so we can just compute T-dependent stuff
        temperatures = np.array([i[0] for i in all_samples], dtype=np.float)
        if temperatures.min() != temperatures.max():
            temperatures = np.linspace(temperatures.min(),
                                       temperatures.max(),
                                       num=100)
        else:
            # We only have one temperature: let's do a bar chart instead
            bar_chart = True
            temperatures = temperatures.min()
        endmember = _translate_endmember_to_array(
            endpoints[0], mod.ast.atoms(v.SiteFraction))[None, None]
        predicted_quantities = calculate(dbf,
                                         comps, [phase_name],
                                         output=yattr,
                                         T=temperatures,
                                         P=101325,
                                         points=endmember,
                                         model=mod,
                                         mode='numpy')
        if y == 'HM' and x == 'T':
            # Shift enthalpy data so that value at minimum T is zero
            predicted_quantities[yattr] -= predicted_quantities[yattr].sel(
                T=temperatures[0]).values.flatten()
        response_data = predicted_quantities[yattr].values.flatten()
        if not bar_chart:
            extra_kwargs = {}
            if len(response_data) < 10:
                extra_kwargs['markersize'] = 20
                extra_kwargs['marker'] = '.'
                extra_kwargs['linestyle'] = 'none'
                extra_kwargs['clip_on'] = False
            ax.plot(temperatures,
                    response_data,
                    label='This work',
                    color='k',
                    **extra_kwargs)
            ax.set_xlabel(plot_mapping.get(x, x))
            ax.set_ylabel(plot_mapping.get(y, y))
        else:
            bar_labels.append('This work')
            bar_data.append(response_data[0])
    elif len(endpoints) == 2:
        # Binary interaction parameter
        first_endpoint = _translate_endmember_to_array(
            endpoints[0], mod.ast.atoms(v.SiteFraction))
        second_endpoint = _translate_endmember_to_array(
            endpoints[1], mod.ast.atoms(v.SiteFraction))
        point_matrix = np.linspace(0, 1, num=100)[None].T * second_endpoint + \
            (1 - np.linspace(0, 1, num=100))[None].T * first_endpoint
        # TODO: Real temperature support
        point_matrix = point_matrix[None, None]
        predicted_quantities = calculate(dbf,
                                         comps, [phase_name],
                                         output=yattr,
                                         T=300,
                                         P=101325,
                                         points=point_matrix,
                                         model=mod,
                                         mode='numpy')
        response_data = predicted_quantities[yattr].values.flatten()
        if not bar_chart:
            extra_kwargs = {}
            if len(response_data) < 10:
                extra_kwargs['markersize'] = 20
                extra_kwargs['marker'] = '.'
                extra_kwargs['linestyle'] = 'none'
                extra_kwargs['clip_on'] = False
            ax.plot(np.linspace(0, 1, num=100),
                    response_data,
                    label='This work',
                    color='k',
                    **extra_kwargs)
            ax.set_xlim((0, 1))
            ax.set_xlabel(
                str(':'.join(endpoints[0])) + ' to ' +
                str(':'.join(endpoints[1])))
            ax.set_ylabel(plot_mapping.get(y, y))
        else:
            bar_labels.append('This work')
            bar_data.append(response_data[0])
    else:
        raise NotImplementedError(
            'No support for plotting configuration {}'.format(configuration))

    bib_reference_keys = sorted(
        list({entry['reference']
              for entry in desired_data}))
    symbol_map = bib_marker_map(bib_reference_keys)

    for data in desired_data:
        indep_var_data = None
        response_data = np.zeros_like(data['values'], dtype=np.float)
        if x == 'T' or x == 'P':
            indep_var_data = np.array(data['conditions'][x],
                                      dtype=np.float).flatten()
        elif x == 'Z':
            if disordered_config:
                # Take the second element of the first interacting sublattice as the coordinate
                # Because it's disordered all sublattices should be equivalent
                # TODO: Fix this to filter because we need to guarantee the plot points are disordered
                occ = data['solver']['sublattice_occupancies']
                subl_idx = np.nonzero(
                    [isinstance(c, (list, tuple)) for c in occ[0]])[0]
                if len(subl_idx) > 1:
                    subl_idx = int(subl_idx[0])
                else:
                    subl_idx = int(subl_idx)
                indep_var_data = [c[subl_idx][1] for c in occ]
            else:
                interactions = np.array([i[1][1] for i in get_samples([data])],
                                        dtype=np.float)
                indep_var_data = 1 - (interactions + 1) / 2
            if y.endswith('_MIX') and data['output'].endswith('_FORM'):
                # All the _FORM data we have still has the lattice stability contribution
                # Need to zero it out to shift formation data to mixing
                mod_latticeonly = Model(
                    dbf,
                    comps,
                    phase_name,
                    parameters={'GHSER' + c.upper(): 0
                                for c in comps})
                mod_latticeonly.models = {
                    key: value
                    for key, value in mod_latticeonly.models.items()
                    if key == 'ref'
                }
                temps = data['conditions'].get('T', 300)
                pressures = data['conditions'].get('P', 101325)
                points = build_sitefractions(
                    phase_name, data['solver']['sublattice_configurations'],
                    data['solver']['sublattice_occupancies'])
                for point_idx in range(len(points)):
                    missing_variables = mod_latticeonly.ast.atoms(
                        v.SiteFraction) - set(points[point_idx].keys())
                    # Set unoccupied values to zero
                    points[point_idx].update(
                        {key: 0
                         for key in missing_variables})
                    # Change entry to a sorted array of site fractions
                    points[point_idx] = list(
                        OrderedDict(sorted(points[point_idx].items(),
                                           key=str)).values())
                points = np.array(points, dtype=np.float)
                # TODO: Real temperature support
                points = points[None, None]
                stability = calculate(dbf,
                                      comps, [phase_name],
                                      output=data['output'][:-5],
                                      T=temps,
                                      P=pressures,
                                      points=points,
                                      model=mod_latticeonly,
                                      mode='numpy')
                response_data -= stability[data['output'][:-5]].values

        response_data += np.array(data['values'], dtype=np.float)
        response_data = response_data.flatten()
        if not bar_chart:
            extra_kwargs = {}
            if len(response_data) < 10:
                extra_kwargs['markersize'] = 8
                extra_kwargs['linestyle'] = 'none'
                extra_kwargs['clip_on'] = False
            ref = data.get('reference', '')
            mark = symbol_map[ref]['markers']
            ax.plot(indep_var_data,
                    response_data,
                    label=symbol_map[ref]['formatted'],
                    marker=mark['marker'],
                    fillstyle=mark['fillstyle'],
                    **extra_kwargs)
        else:
            bar_labels.append(data.get('reference', None))
            bar_data.append(response_data[0])
    if bar_chart:
        ax.barh(0.02 * np.arange(len(bar_data)),
                bar_data,
                color='k',
                height=0.01)
        endmember_title = ' to '.join([':'.join(i) for i in endpoints])
        ax.get_figure().suptitle('{} (T = {} K)'.format(
            endmember_title, temperatures),
                                 fontsize=20)
        ax.set_yticks(0.02 * np.arange(len(bar_data)))
        ax.set_yticklabels(bar_labels, fontsize=20)
        # This bar chart is rotated 90 degrees, so "y" is now x
        ax.set_xlabel(plot_mapping.get(y, y))
    else:
        ax.set_frame_on(False)
        leg = ax.legend(loc='best')
        leg.get_frame().set_edgecolor('black')
    return ax
Example #2
0
def fit_formation_energy(dbf,
                         comps,
                         phase_name,
                         configuration,
                         symmetry,
                         datasets,
                         features=None):
    """
    Find suitable linear model parameters for the given phase.
    We do this by successively fitting heat capacities, entropies and
    enthalpies of formation, and selecting against criteria to prevent
    overfitting. The "best" set of parameters minimizes the error
    without overfitting.

    Parameters
    ----------
    dbf : Database
        pycalphad Database. Partially complete, so we know what degrees of freedom to fix.
    comps : [str]
        Names of the relevant components.
    phase_name : str
        Name of the desired phase for which the parameters will be found.
    configuration : ndarray
        Configuration of the sublattices for the fitting procedure.
    symmetry : [[int]]
        Symmetry of the sublattice configuration.
    datasets : PickleableTinyDB
        All the datasets desired to fit to.
    features : dict
        Maps "property" to a list of features for the linear model.
        These will be transformed from "GM" coefficients
        e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None)

    Returns
    -------
    dict
        {feature: estimated_value}

    """
    if features is None:
        features = [("CPM_FORM", (v.T * sympy.log(v.T), v.T**2, v.T**-1,
                                  v.T**3)), ("SM_FORM", (v.T, )),
                    ("HM_FORM", (sympy.S.One, ))]
        features = OrderedDict(features)
    if any([isinstance(conf, (list, tuple)) for conf in configuration]):
        # TODO: assumes binary interaction here
        fitting_steps = (["CPM_FORM",
                          "CPM_MIX"], ["SM_FORM",
                                       "SM_MIX"], ["HM_FORM", "HM_MIX"])
        # Product of all nonzero site fractions in all sublattices
        YS = sympy.Symbol('YS')
        # Product of all binary interaction terms
        Z = sympy.Symbol('Z')
        redlich_kister_features = (YS, YS * Z, YS * (Z**2), YS * (Z**3))
        for feature in features.keys():
            all_features = list(
                itertools.product(redlich_kister_features, features[feature]))
            features[feature] = [i[0] * i[1] for i in all_features]
        logging.debug('ENDMEMBERS FROM INTERACTION: {}'.format(
            endmembers_from_interaction(configuration)))
    else:
        # We are only fitting an endmember; no mixing data needed
        fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"])

    parameters = {}
    for feature in features.values():
        for coef in feature:
            parameters[coef] = 0

    # These is our previously fit partial model
    # Subtract out all of these contributions (zero out reference state because these are formation properties)
    fixed_model = Model(
        dbf,
        comps,
        phase_name,
        parameters={'GHSER' + (c.upper() * 2)[:2]: 0
                    for c in comps})
    fixed_model.models['idmix'] = 0
    fixed_portions = [0]

    moles_per_formula_unit = sympy.S(0)
    subl_idx = 0
    for num_sites, const in zip(dbf.phases[phase_name].sublattices,
                                dbf.phases[phase_name].constituents):
        if Species('VA') in const:
            moles_per_formula_unit += num_sites * (
                1 - v.SiteFraction(phase_name, subl_idx, Species('VA')))
        else:
            moles_per_formula_unit += num_sites
        subl_idx += 1

    for desired_props in fitting_steps:
        desired_data = get_data(comps, phase_name, configuration, symmetry,
                                datasets, desired_props)
        logging.debug('{}: datasets found: {}'.format(desired_props,
                                                      len(desired_data)))
        if len(desired_data) > 0:
            # We assume all properties in the same fitting step have the same features (but different ref states)
            feature_matrix = _build_feature_matrix(desired_props[0],
                                                   features[desired_props[0]],
                                                   desired_data)
            all_samples = get_samples(desired_data)
            data_quantities = np.concatenate(_shift_reference_state(
                desired_data, feature_transforms[desired_props[0]],
                fixed_model),
                                             axis=-1)
            site_fractions = [
                build_sitefractions(
                    phase_name, ds['solver']['sublattice_configurations'],
                    ds['solver'].get(
                        'sublattice_occupancies',
                        np.ones((
                            len(ds['solver']['sublattice_configurations']),
                            len(ds['solver']['sublattice_configurations'][0])),
                                dtype=np.float))) for ds in desired_data
                for _ in ds['conditions']['T']
            ]
            # Flatten list
            site_fractions = list(itertools.chain(*site_fractions))
            # Remove existing partial model contributions from the data
            data_quantities = data_quantities - feature_transforms[
                desired_props[0]](fixed_model.ast)
            # Subtract out high-order (in T) parameters we've already fit
            data_quantities = data_quantities - \
                feature_transforms[desired_props[0]](sum(fixed_portions)) / moles_per_formula_unit
            for sf, i in zip(site_fractions, data_quantities):
                missing_variables = sympy.S(i * moles_per_formula_unit).atoms(
                    v.SiteFraction) - set(sf.keys())
                sf.update({x: 0. for x in missing_variables})
            # moles_per_formula_unit factor is here because our data is stored per-atom
            # but all of our fits are per-formula-unit
            data_quantities = [
                sympy.S(i * moles_per_formula_unit).xreplace(sf).xreplace({
                    v.T:
                    ixx[0]
                }).evalf() for i, sf, ixx in zip(data_quantities,
                                                 site_fractions, all_samples)
            ]
            data_quantities = np.asarray(data_quantities, dtype=np.float)
            parameters.update(
                _fit_parameters(feature_matrix, data_quantities,
                                features[desired_props[0]]))
            # Add these parameters to be fixed for the next fitting step
            fixed_portion = np.array(features[desired_props[0]],
                                     dtype=np.object)
            fixed_portion = np.dot(fixed_portion, [
                parameters[feature] for feature in features[desired_props[0]]
            ])
            fixed_portions.append(fixed_portion)
    return parameters