def filter_sublattice_configurations( desired_data: List[Dataset], subl_model) -> List[Dataset]: # TODO: symmetry support """Modify the desired_data to remove any configurations that cannot be represented by the sublattice model.""" subl_model_sets = [set(subl) for subl in subl_model] for data in desired_data: matching_configs = [ ] # binary mask of whether a configuration is represented by the sublattice model for config in data['solver']['sublattice_configurations']: config = recursive_tuplify(canonicalize(config, None)) if (len(config) == len(subl_model) and all( subl.issuperset(tuplify(config_subl)) for subl, config_subl in zip(subl_model_sets, config))): matching_configs.append(True) else: matching_configs.append(False) matching_configs = np.asarray(matching_configs, dtype=np.bool_) # Rewrite output values with filtered data data['values'] = np.array(data['values'], dtype=np.float_)[..., matching_configs] data['solver']['sublattice_configurations'] = np.array( data['solver']['sublattice_configurations'], dtype=np.object_)[matching_configs].tolist() if 'sublattice_occupancies' in data['solver']: data['solver']['sublattice_occupancies'] = np.array( data['solver']['sublattice_occupancies'], dtype=np.object_)[matching_configs].tolist() return desired_data
def get_data(comps, phase_name, configuration, symmetry, datasets, prop): """ Return list of cleaned single phase datasets matching the passed arguments. Parameters ---------- comps : list List of string component names phase_name : str Name of phase configuration : tuple Sublattice configuration as a tuple, e.g. ("CU", ("CU", "MG")) symmetry : list of lists List of sublattice indices with symmetry datasets : espei.utils.PickleableTinyDB Database of datasets to search for data prop : list String name of the property of interest. Returns ------- list List of datasets matching the arguments. """ desired_data = datasets.search((tinydb.where('output').test(lambda x: x in prop)) & (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) & (tinydb.where('phases') == [phase_name])) # This seems to be necessary because the 'values' member does not modify 'datasets' # But everything else does! desired_data = copy.deepcopy(desired_data) def recursive_zip(a, b): if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): return list(recursive_zip(x, y) for x, y in zip(a, b)) else: return list(zip(a, b)) for idx, data in enumerate(desired_data): # Filter output values to only contain data for matching sublattice configurations matching_configs = np.array([(canonicalize(sblconf, symmetry) == canonicalize(configuration, symmetry)) for sblconf in data['solver']['sublattice_configurations']]) matching_configs = np.arange(len(data['solver']['sublattice_configurations']))[matching_configs] # Rewrite output values with filtered data desired_data[idx]['values'] = np.array(data['values'], dtype=np.float)[..., matching_configs] desired_data[idx]['solver']['sublattice_configurations'] = recursive_tuplify(np.array(data['solver']['sublattice_configurations'], dtype=np.object)[matching_configs].tolist()) try: desired_data[idx]['solver']['sublattice_occupancies'] = np.array(data['solver']['sublattice_occupancies'], dtype=np.object)[matching_configs].tolist() except KeyError: pass # Filter out temperatures below 298.15 K (for now, until better refstates exist) temp_filter = np.atleast_1d(data['conditions']['T']) >= 298.15 desired_data[idx]['conditions']['T'] = np.atleast_1d(data['conditions']['T'])[temp_filter] # Don't use data['values'] because we rewrote it above; not sure what 'data' references now desired_data[idx]['values'] = desired_data[idx]['values'][..., temp_filter, :] return desired_data
def test_get_data_for_a_minimal_example(): """Given a dataset and the congfiguration pertaining to that dataset, we should find the values.""" SAMPLE_DATASET = { "components": ["CU", "MG", "VA"], "phases": ["LAVES_C15"], "solver": { "mode": "manual", "sublattice_site_ratios": [2, 1], "sublattice_configurations": [["CU", "MG"], ["MG", "CU"], ["MG", "MG"], ["CU", "CU"]] }, "conditions": { "P": 101325, "T": 298.15 }, "output": "HM_FORM", "values": [[[-15720, 34720, 7000, 15500]]] } datasets = PickleableTinyDB(storage=MemoryStorage) datasets.insert(SAMPLE_DATASET) comps = ['CU', 'MG', 'VA'] phase_name = 'LAVES_C15' configuration = ('MG', 'CU') symmetry = None desired_props = ['HM_FORM'] # The following lines replace "get_data" in a more functional form solver_qry = (tinydb.where('solver').test( symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) desired_data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) desired_data = filter_configurations(desired_data, configuration, symmetry) desired_data = filter_temperatures(desired_data) assert len(desired_data) == 1 desired_data = desired_data[0] assert desired_data['components'] == comps assert desired_data['phases'][0] == phase_name assert desired_data['solver']['sublattice_site_ratios'] == [2, 1] assert desired_data['solver']['sublattice_configurations'] == (('MG', 'CU'), ) assert desired_data['conditions']['P'] == 101325 assert desired_data['conditions']['T'] == 298.15 assert desired_data['output'] == 'HM_FORM' assert desired_data['values'] == np.array([[[34720.0]]])
def filter_configurations(desired_data: List[Dataset], configuration, symmetry) -> List[Dataset]: """ Return non-equilibrium thermochemical datasets with invalid configurations removed. Parameters ---------- desired_data : List[Dataset] List of non-equilibrium thermochemical datasets configuration : tuple Sublattice configuration as a tuple, e.g. ("CU", ("CU", "MG")) symmetry : list of lists List of sublattice indices with symmetry Returns ------- List[Dataset] """ for data in desired_data: # Filter output values to only contain data for matching sublattice configurations matching_configs = np.array([ (canonicalize(sblconf, symmetry) == canonicalize(configuration, symmetry)) for sblconf in data['solver']['sublattice_configurations'] ]) matching_configs = np.arange( len(data['solver']['sublattice_configurations']))[matching_configs] # Rewrite output values with filtered data data['values'] = np.array(data['values'], dtype=np.float_)[..., matching_configs] data['solver']['sublattice_configurations'] = recursive_tuplify( np.array(data['solver']['sublattice_configurations'], dtype=np.object_)[matching_configs].tolist()) if 'sublattice_occupancies' in data['solver']: data['solver']['sublattice_occupancies'] = np.array( data['solver']['sublattice_occupancies'], dtype=np.object_)[matching_configs].tolist() return desired_data
def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datasets, ridge_alpha=None, aicc_phase_penalty=None, features=None): """ Find suitable linear model parameters for the given phase. We do this by successively fitting heat capacities, entropies and enthalpies of formation, and selecting against criteria to prevent overfitting. The "best" set of parameters minimizes the error without overfitting. Parameters ---------- dbf : Database pycalphad Database. Partially complete, so we know what degrees of freedom to fix. comps : [str] Names of the relevant components. phase_name : str Name of the desired phase for which the parameters will be found. configuration : ndarray Configuration of the sublattices for the fitting procedure. symmetry : [[int]] Symmetry of the sublattice configuration. datasets : PickleableTinyDB All the datasets desired to fit to. ridge_alpha : float Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate with ordinary least squares regression. For now, the parameter is applied to all features. aicc_feature_factors : dict Map of phase name to feature to a multiplication factor for the AICc's parameter penalty. features : dict Maps "property" to a list of features for the linear model. These will be transformed from "GM" coefficients e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None) Returns ------- dict {feature: estimated_value} """ aicc_feature_factors = aicc_phase_penalty if aicc_phase_penalty is not None else {} if interaction_test(configuration): _log.debug('ENDMEMBERS FROM INTERACTION: %s', endmembers_from_interaction(configuration)) fitting_steps = (["CPM_FORM", "CPM_MIX"], ["SM_FORM", "SM_MIX"], ["HM_FORM", "HM_MIX"]) else: # We are only fitting an endmember; no mixing data needed fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"]) # create the candidate models and fitting steps if features is None: features = OrderedDict([("CPM_FORM", (v.T * sympy.log(v.T), v.T**2, v.T**-1, v.T**3)), ("SM_FORM", (v.T,)), ("HM_FORM", (sympy.S.One,)), ]) # dict of {feature, [candidate_models]} candidate_models_features = build_candidate_models(configuration, features) # All possible parameter values that could be taken on. This is some legacy # code from before there were many candidate models built. For very large # sets of candidate models, this could be quite slow. # TODO: we might be able to remove this initialization for clarity, depends on fixed poritions parameters = {} for candidate_models in candidate_models_features.values(): for model in candidate_models: for coef in model: parameters[coef] = 0 # These is our previously fit partial model from previous steps # Subtract out all of these contributions (zero out reference state because these are formation properties) fixed_model = Model(dbf, comps, phase_name, parameters={'GHSER'+(c.upper()*2)[:2]: 0 for c in comps}) fixed_portions = [0] for desired_props in fitting_steps: feature_type = desired_props[0].split('_')[0] # HM_FORM -> HM aicc_factor = aicc_feature_factors.get(feature_type, 1.0) solver_qry = (where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) desired_data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) desired_data = filter_configurations(desired_data, configuration, symmetry) desired_data = filter_temperatures(desired_data) _log.trace('%s: datasets found: %s', desired_props, len(desired_data)) if len(desired_data) > 0: config_tup = tuple(map(tuplify, configuration)) calculate_dict = get_prop_samples(desired_data, config_tup) sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, list(map(len, config_tup))) weights = calculate_dict['weights'] assert len(sample_condition_dicts) == len(weights) # We assume all properties in the same fitting step have the same # features (all CPM, all HM, etc., but different ref states). # data quantities are the same for each candidate model and can be computed up front data_qtys = get_data_quantities(feature_type, fixed_model, fixed_portions, desired_data, sample_condition_dicts) # build the candidate model transformation matrix and response vector (A, b in Ax=b) feature_matricies = [] data_quantities = [] for candidate_coefficients in candidate_models_features[desired_props[0]]: # Map coeffiecients in G to coefficients in the feature_type (H, S, CP) transformed_coefficients = list(map(feature_transforms[feature_type], candidate_coefficients)) if interaction_test(configuration, 3): feature_matricies.append(_build_feature_matrix(sample_condition_dicts, transformed_coefficients)) else: feature_matricies.append(_build_feature_matrix(sample_condition_dicts, transformed_coefficients)) data_quantities.append(data_qtys) # provide candidate models and get back a selected model. selected_model = select_model(zip(candidate_models_features[desired_props[0]], feature_matricies, data_quantities), ridge_alpha, weights=weights, aicc_factor=aicc_factor) selected_features, selected_values = selected_model parameters.update(zip(*(selected_features, selected_values))) # Add these parameters to be fixed for the next fitting step fixed_portion = np.array(selected_features, dtype=np.object_) fixed_portion = np.dot(fixed_portion, selected_values) fixed_portions.append(fixed_portion) return parameters
def _compare_data_to_parameters(dbf, comps, phase_name, desired_data, mod, configuration, x, y, ax=None): """ Return one set of plotted Axes with data compared to calculated parameters Parameters ---------- dbf : Database pycalphad thermodynamic database containing the relevant parameters. comps : list Names of components to consider in the calculation. phase_name : str Name of the considered phase phase desired_data : mod : Model A pycalphad Model. The Model may or may not have the reference state zeroed out for formation properties. configuration : x : str Model property to plot on the x-axis e.g. 'T', 'HM_MIX', 'SM_FORM' y : str Model property to plot on the y-axis e.g. 'T', 'HM_MIX', 'SM_FORM' ax : matplotlib.Axes Default axes used if not specified. Returns ------- matplotlib.Axes """ species = unpack_components(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active: constituents = [[sp.name for sp in sorted(subl_constituents.intersection(species))] for subl_constituents in phase_constituents] subl_dof = list(map(len, constituents)) calculate_dict = get_prop_samples(desired_data, constituents) sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, subl_dof) endpoints = endmembers_from_interaction(configuration) interacting_subls = [c for c in recursive_tuplify(configuration) if isinstance(c, tuple)] disordered_config = False if (len(set(interacting_subls)) == 1) and (len(interacting_subls[0]) == 2): # This configuration describes all sublattices with the same two elements interacting # In general this is a high-dimensional space; just plot the diagonal to see the disordered mixing endpoints = [endpoints[0], endpoints[-1]] disordered_config = True if not ax: ax = plt.subplot() bar_chart = False bar_labels = [] bar_data = [] if y.endswith('_FORM'): # We were passed a Model object with zeroed out reference states yattr = y[:-5] else: yattr = y if len(endpoints) == 1: # This is an endmember so we can just compute T-dependent stuff Ts = calculate_dict['T'] temperatures = np.asarray(Ts if len(Ts) > 0 else 298.15) if temperatures.min() != temperatures.max(): temperatures = np.linspace(temperatures.min(), temperatures.max(), num=100) else: # We only have one temperature: let's do a bar chart instead bar_chart = True temperatures = temperatures.min() endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction))[None, None] predicted_quantities = calculate(dbf, comps, [phase_name], output=yattr, T=temperatures, P=101325, points=endmember, model=mod, mode='numpy') if y == 'HM' and x == 'T': # Shift enthalpy data so that value at minimum T is zero predicted_quantities[yattr] -= predicted_quantities[yattr].sel(T=temperatures[0]).values.flatten() response_data = predicted_quantities[yattr].values.flatten() if not bar_chart: extra_kwargs = {} if len(response_data) < 10: extra_kwargs['markersize'] = 20 extra_kwargs['marker'] = '.' extra_kwargs['linestyle'] = 'none' extra_kwargs['clip_on'] = False ax.plot(temperatures, response_data, label='This work', color='k', **extra_kwargs) ax.set_xlabel(plot_mapping.get(x, x)) ax.set_ylabel(plot_mapping.get(y, y)) else: bar_labels.append('This work') bar_data.append(response_data[0]) elif len(endpoints) == 2: # Binary interaction parameter first_endpoint = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction)) second_endpoint = _translate_endmember_to_array(endpoints[1], mod.ast.atoms(v.SiteFraction)) point_matrix = np.linspace(0, 1, num=100)[None].T * second_endpoint + \ (1 - np.linspace(0, 1, num=100))[None].T * first_endpoint # TODO: Real temperature support point_matrix = point_matrix[None, None] predicted_quantities = calculate(dbf, comps, [phase_name], output=yattr, T=300, P=101325, points=point_matrix, model=mod, mode='numpy') response_data = predicted_quantities[yattr].values.flatten() if not bar_chart: extra_kwargs = {} if len(response_data) < 10: extra_kwargs['markersize'] = 20 extra_kwargs['marker'] = '.' extra_kwargs['linestyle'] = 'none' extra_kwargs['clip_on'] = False ax.plot(np.linspace(0, 1, num=100), response_data, label='This work', color='k', **extra_kwargs) ax.set_xlim((0, 1)) ax.set_xlabel(str(':'.join(endpoints[0])) + ' to ' + str(':'.join(endpoints[1]))) ax.set_ylabel(plot_mapping.get(y, y)) else: bar_labels.append('This work') bar_data.append(response_data[0]) else: raise NotImplementedError('No support for plotting configuration {}'.format(configuration)) bib_reference_keys = sorted({entry.get('reference', '') for entry in desired_data}) symbol_map = bib_marker_map(bib_reference_keys) for data in desired_data: indep_var_data = None response_data = np.zeros_like(data['values'], dtype=np.float_) if x == 'T' or x == 'P': indep_var_data = np.array(data['conditions'][x], dtype=np.float_).flatten() elif x == 'Z': if disordered_config: # Take the second element of the first interacting sublattice as the coordinate # Because it's disordered all sublattices should be equivalent # TODO: Fix this to filter because we need to guarantee the plot points are disordered occ = data['solver']['sublattice_occupancies'] subl_idx = np.nonzero([isinstance(c, (list, tuple)) for c in occ[0]])[0] if len(subl_idx) > 1: subl_idx = int(subl_idx[0]) else: subl_idx = int(subl_idx) indep_var_data = [c[subl_idx][1] for c in occ] else: interactions = np.array([cond_dict[Symbol('YS')] for cond_dict in sample_condition_dicts]) indep_var_data = 1 - (interactions+1)/2 if y.endswith('_MIX') and data['output'].endswith('_FORM'): # All the _FORM data we have still has the lattice stability contribution # Need to zero it out to shift formation data to mixing mod_latticeonly = Model(dbf, comps, phase_name, parameters={'GHSER'+c.upper(): 0 for c in comps}) mod_latticeonly.models = {key: value for key, value in mod_latticeonly.models.items() if key == 'ref'} temps = data['conditions'].get('T', 300) pressures = data['conditions'].get('P', 101325) points = build_sitefractions(phase_name, data['solver']['sublattice_configurations'], data['solver']['sublattice_occupancies']) for point_idx in range(len(points)): missing_variables = mod_latticeonly.ast.atoms(v.SiteFraction) - set(points[point_idx].keys()) # Set unoccupied values to zero points[point_idx].update({key: 0 for key in missing_variables}) # Change entry to a sorted array of site fractions points[point_idx] = list(OrderedDict(sorted(points[point_idx].items(), key=str)).values()) points = np.array(points, dtype=np.float_) # TODO: Real temperature support points = points[None, None] stability = calculate(dbf, comps, [phase_name], output=data['output'][:-5], T=temps, P=pressures, points=points, model=mod_latticeonly, mode='numpy') response_data -= stability[data['output'][:-5]].values.squeeze() response_data += np.array(data['values'], dtype=np.float_) response_data = response_data.flatten() if not bar_chart: extra_kwargs = {} extra_kwargs['markersize'] = 8 extra_kwargs['linestyle'] = 'none' extra_kwargs['clip_on'] = False ref = data.get('reference', '') mark = symbol_map[ref]['markers'] ax.plot(indep_var_data, response_data, label=symbol_map[ref]['formatted'], marker=mark['marker'], fillstyle=mark['fillstyle'], **extra_kwargs) else: bar_labels.append(data.get('reference', None)) bar_data.append(response_data[0]) if bar_chart: ax.barh(0.02 * np.arange(len(bar_data)), bar_data, color='k', height=0.01) endmember_title = ' to '.join([':'.join(i) for i in endpoints]) ax.get_figure().suptitle('{} (T = {} K)'.format(endmember_title, temperatures), fontsize=20) ax.set_yticks(0.02 * np.arange(len(bar_data))) ax.set_yticklabels(bar_labels, fontsize=20) # This bar chart is rotated 90 degrees, so "y" is now x ax.set_xlabel(plot_mapping.get(y, y)) else: ax.set_frame_on(False) leg = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend outside leg.get_frame().set_edgecolor('black') return ax
def plot_endmember(dbf, comps, phase_name, configuration, output, datasets=None, symmetry=None, x='T', ax=None, plot_kwargs=None, dataplot_kwargs=None) -> plt.Axes: """ Return one set of plotted Axes with data compared to calculated parameters Parameters ---------- dbf : Database pycalphad thermodynamic database containing the relevant parameters. comps : Sequence[str] Names of components to consider in the calculation. phase_name : str Name of the considered phase phase configuration : Tuple[Tuple[str]] ESPEI-style configuration output : str Model property to plot on the y-axis e.g. ``'HM_MIX'``, or ``'SM_MIX'``. Must be a ``'_MIX'`` property. datasets : tinydb.TinyDB symmetry : list List of lists containing indices of symmetric sublattices e.g. [[0, 1], [2, 3]] ax : plt.Axes Default axes used if not specified. plot_kwargs : Optional[Dict[str, Any]] Keyword arguments to ``ax.plot`` for the predicted data. dataplot_kwargs : Optional[Dict[str, Any]] Keyword arguments to ``ax.plot`` the observed data. Returns ------- plt.Axes """ if output.endswith('_MIX'): raise ValueError("`plot_interaction` only supports HM, HM_FORM, SM, SM_FORM or CPM, CPM_FORM outputs.") if x not in ('T',): raise ValueError(f'`x` passed to `plot_endmember` must be "T" got {x}') if not plot_kwargs: plot_kwargs = {} if not dataplot_kwargs: dataplot_kwargs = {} if not ax: ax = plt.subplot() if datasets is not None: solver_qry = (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) desired_data = get_prop_data(comps, phase_name, output, datasets, additional_query=solver_qry) desired_data = filter_configurations(desired_data, configuration, symmetry) desired_data = filter_temperatures(desired_data) else: desired_data = [] # Plot predicted values from the database endpoints = endmembers_from_interaction(configuration) if len(endpoints) != 1: raise ValueError(f"The configuration passed to `plot_endmember` must be an endmebmer configuration. Got {configuration}") if output.endswith('_FORM'): # TODO: better reference state handling mod = Model(dbf, comps, phase_name, parameters={'GHSER'+(c.upper()*2)[:2]: 0 for c in comps}) prop = output[:-5] else: mod = Model(dbf, comps, phase_name) prop = output endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction))[None, None] # Set up the domain of the calculation species = unpack_components(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active constituents = [[sp.name for sp in sorted(subl_constituents.intersection(species))] for subl_constituents in phase_constituents] calculate_dict = get_prop_samples(desired_data, constituents) potential_values = np.asarray(calculate_dict[x] if len(calculate_dict[x]) > 0 else 298.15) potential_grid = np.linspace(max(potential_values.min()-1, 0), potential_values.max()+1, num=100) predicted_values = calculate(dbf, comps, [phase_name], output=prop, T=potential_grid, P=101325, points=endmember, model=mod)[prop].values.flatten() ax.plot(potential_grid, predicted_values, **plot_kwargs) # Plot observed values # TODO: model exclusions handling bib_reference_keys = sorted({entry.get('reference', '') for entry in desired_data}) symbol_map = bib_marker_map(bib_reference_keys) for data in desired_data: indep_var_data = None response_data = np.zeros_like(data['values'], dtype=np.float_) indep_var_data = np.array(data['conditions'][x], dtype=np.float_).flatten() response_data += np.array(data['values'], dtype=np.float_) response_data = response_data.flatten() ref = data.get('reference', '') dataplot_kwargs.setdefault('markersize', 8) dataplot_kwargs.setdefault('linestyle', 'none') dataplot_kwargs.setdefault('clip_on', False) # Cannot use setdefault because it won't overwrite previous iterations dataplot_kwargs['label'] = symbol_map[ref]['formatted'] dataplot_kwargs['marker'] = symbol_map[ref]['markers']['marker'] dataplot_kwargs['fillstyle'] = symbol_map[ref]['markers']['fillstyle'] ax.plot(indep_var_data, response_data, **dataplot_kwargs) ax.set_xlabel(plot_mapping.get(x, x)) ax.set_ylabel(plot_mapping.get(output, output)) leg = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend outside leg.get_frame().set_edgecolor('black') return ax
def plot_interaction(dbf, comps, phase_name, configuration, output, datasets=None, symmetry=None, ax=None, plot_kwargs=None, dataplot_kwargs=None) -> plt.Axes: """ Return one set of plotted Axes with data compared to calculated parameters Parameters ---------- dbf : Database pycalphad thermodynamic database containing the relevant parameters. comps : Sequence[str] Names of components to consider in the calculation. phase_name : str Name of the considered phase phase configuration : Tuple[Tuple[str]] ESPEI-style configuration output : str Model property to plot on the y-axis e.g. ``'HM_MIX'``, or ``'SM_MIX'``. Must be a ``'_MIX'`` property. datasets : tinydb.TinyDB symmetry : list List of lists containing indices of symmetric sublattices e.g. [[0, 1], [2, 3]] ax : plt.Axes Default axes used if not specified. plot_kwargs : Optional[Dict[str, Any]] Keyword arguments to ``ax.plot`` for the predicted data. dataplot_kwargs : Optional[Dict[str, Any]] Keyword arguments to ``ax.plot`` the observed data. Returns ------- plt.Axes """ if not output.endswith('_MIX'): raise ValueError("`plot_interaction` only supports HM_MIX, SM_MIX, or CPM_MIX outputs.") if not plot_kwargs: plot_kwargs = {} if not dataplot_kwargs: dataplot_kwargs = {} if not ax: ax = plt.subplot() # Plot predicted values from the database grid, predicted_values = _get_interaction_predicted_values(dbf, comps, phase_name, configuration, output) plot_kwargs.setdefault('label', 'This work') plot_kwargs.setdefault('color', 'k') ax.plot(grid, predicted_values, **plot_kwargs) # Plot the observed values from the datasets # TODO: model exclusions handling # TODO: better reference state handling mod_srf = Model(dbf, comps, phase_name, parameters={'GHSER'+c.upper(): 0 for c in comps}) mod_srf.models = {'ref': mod_srf.models['ref']} # _MIX assumption prop = output.split('_MIX')[0] desired_props = (f"{prop}_MIX", f"{prop}_FORM") if datasets is not None: solver_qry = (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) desired_data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) desired_data = filter_configurations(desired_data, configuration, symmetry) desired_data = filter_temperatures(desired_data) else: desired_data = [] species = unpack_components(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active constituents = [[sp.name for sp in sorted(subl_constituents.intersection(species))] for subl_constituents in phase_constituents] subl_dof = list(map(len, constituents)) calculate_dict = get_prop_samples(desired_data, constituents) sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, subl_dof) interacting_subls = [c for c in recursive_tuplify(configuration) if isinstance(c, tuple)] if (len(set(interacting_subls)) == 1) and (len(interacting_subls[0]) == 2): # This configuration describes all sublattices with the same two elements interacting # In general this is a high-dimensional space; just plot the diagonal to see the disordered mixing endpoints = endmembers_from_interaction(configuration) endpoints = [endpoints[0], endpoints[-1]] disordered_config = True else: disordered_config = False bib_reference_keys = sorted({entry.get('reference', '') for entry in desired_data}) symbol_map = bib_marker_map(bib_reference_keys) for data in desired_data: indep_var_data = None response_data = np.zeros_like(data['values'], dtype=np.float_) if disordered_config: # Take the second element of the first interacting sublattice as the coordinate # Because it's disordered all sublattices should be equivalent # TODO: Fix this to filter because we need to guarantee the plot points are disordered occ = data['solver']['sublattice_occupancies'] subl_idx = np.nonzero([isinstance(c, (list, tuple)) for c in occ[0]])[0] if len(subl_idx) > 1: subl_idx = int(subl_idx[0]) else: subl_idx = int(subl_idx) indep_var_data = [c[subl_idx][1] for c in occ] else: interactions = np.array([cond_dict[Symbol('YS')] for cond_dict in sample_condition_dicts]) indep_var_data = 1 - (interactions+1)/2 if data['output'].endswith('_FORM'): # All the _FORM data we have still has the lattice stability contribution # Need to zero it out to shift formation data to mixing temps = data['conditions'].get('T', 298.15) pressures = data['conditions'].get('P', 101325) points = build_sitefractions(phase_name, data['solver']['sublattice_configurations'], data['solver']['sublattice_occupancies']) for point_idx in range(len(points)): missing_variables = mod_srf.ast.atoms(v.SiteFraction) - set(points[point_idx].keys()) # Set unoccupied values to zero points[point_idx].update({key: 0 for key in missing_variables}) # Change entry to a sorted array of site fractions points[point_idx] = list(OrderedDict(sorted(points[point_idx].items(), key=str)).values()) points = np.array(points, dtype=np.float_) # TODO: Real temperature support points = points[None, None] stability = calculate(dbf, comps, [phase_name], output=data['output'][:-5], T=temps, P=pressures, points=points, model=mod_srf) response_data -= stability[data['output'][:-5]].values.squeeze() response_data += np.array(data['values'], dtype=np.float_) response_data = response_data.flatten() ref = data.get('reference', '') dataplot_kwargs.setdefault('markersize', 8) dataplot_kwargs.setdefault('linestyle', 'none') dataplot_kwargs.setdefault('clip_on', False) # Cannot use setdefault because it won't overwrite previous iterations dataplot_kwargs['label'] = symbol_map[ref]['formatted'] dataplot_kwargs['marker'] = symbol_map[ref]['markers']['marker'] dataplot_kwargs['fillstyle'] = symbol_map[ref]['markers']['fillstyle'] ax.plot(indep_var_data, response_data, **dataplot_kwargs) ax.set_xlim((0, 1)) ax.set_xlabel(str(':'.join(endpoints[0])) + ' to ' + str(':'.join(endpoints[1]))) ax.set_ylabel(plot_mapping.get(output, output)) leg = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend outside leg.get_frame().set_edgecolor('black') return ax
def plot_parameters(dbf, comps, phase_name, configuration, symmetry, datasets=None, fig=None, require_data=True): """ Plot parameters of interest compared with data in subplots of a single figure Parameters ---------- dbf : Database pycalphad thermodynamic database containing the relevant parameters. comps : list Names of components to consider in the calculation. phase_name : str Name of the considered phase phase configuration : tuple Sublattice configuration to plot, such as ('CU', 'CU') or (('CU', 'MG'), 'CU') symmetry : list List of lists containing indices of symmetric sublattices e.g. [[0, 1], [2, 3]] datasets : PickleableTinyDB ESPEI datasets to compare against. If None, nothing is plotted. fig : matplotlib.Figure Figure to create with axes as subplots. require_data : bool If True, plot parameters that have data corresponding data. Defaults to True. Will raise an error for non-interaction configurations. Returns ------- None Examples -------- >>> # plot the LAVES_C15 (Cu)(Mg) endmember >>> plot_parameters(dbf, ['CU', 'MG'], 'LAVES_C15', ('CU', 'MG'), symmetry=None, datasets=datasets) # doctest: +SKIP >>> # plot the mixing interaction in the first sublattice >>> plot_parameters(dbf, ['CU', 'MG'], 'LAVES_C15', (('CU', 'MG'), 'MG'), symmetry=None, datasets=datasets) # doctest: +SKIP """ deprecation_msg = ( "`espei.plot.plot_parameters` is deprecated and will be removed in ESPEI 0.9. " "Please use `plot_endmember` or `plot_interaction` instead." ) warnings.warn(deprecation_msg, category=FutureWarning) em_plots = [('T', 'CPM'), ('T', 'CPM_FORM'), ('T', 'SM'), ('T', 'SM_FORM'), ('T', 'HM'), ('T', 'HM_FORM')] mix_plots = [ ('Z', 'HM_MIX'), ('Z', 'SM_MIX')] comps = sorted(comps) mod = Model(dbf, comps, phase_name) mod.models['idmix'] = 0 # This is for computing properties of formation mod_norefstate = Model(dbf, comps, phase_name, parameters={'GHSER'+(c.upper()*2)[:2]: 0 for c in comps}) # Is this an interaction parameter or endmember? if any([isinstance(conf, list) or isinstance(conf, tuple) for conf in configuration]): plots = mix_plots else: plots = em_plots # filter which parameters to plot by the data that exists if require_data and datasets is not None: filtered_plots = [] for x_val, y_val in plots: desired_props = [y_val.split('_')[0]+'_FORM', y_val] if y_val.endswith('_MIX') else [y_val] solver_qry = (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) data = filter_configurations(data, configuration, symmetry) data = filter_temperatures(data) if len(data) > 0: filtered_plots.append((x_val, y_val, data)) elif require_data: raise ValueError('Plots require datasets, but no datasets were passed.') elif plots == em_plots and not require_data: # How we treat temperature dependence is ambiguous when there is no data, so we raise an error raise ValueError('The "require_data=False" option is not supported for non-mixing configurations.') elif datasets is not None: filtered_plots = [] for x_val, y_val in plots: desired_props = [y_val.split('_')[0]+'_FORM', y_val] if y_val.endswith('_MIX') else [y_val] solver_qry = (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) data = filter_configurations(data, configuration, symmetry) data = filter_temperatures(data) filtered_plots.append((x_val, y_val, data)) else: filtered_plots = [(x_val, y_val, []) for x_val, y_val in plots] num_plots = len(filtered_plots) if num_plots == 0: return if not fig: fig = plt.figure(figsize=plt.figaspect(num_plots)) # plot them for i, (x_val, y_val, data) in enumerate(filtered_plots): if y_val.endswith('_FORM'): ax = fig.add_subplot(num_plots, 1, i+1) ax = _compare_data_to_parameters(dbf, comps, phase_name, data, mod_norefstate, configuration, x_val, y_val, ax=ax) else: ax = fig.add_subplot(num_plots, 1, i+1) ax = _compare_data_to_parameters(dbf, comps, phase_name, data, mod, configuration, x_val, y_val, ax=ax)