def test_get_data_quantities_magnetic_energy(): data = [{"components": ["AL", "CR"], "phases": ["ALCR2"], "solver": {"mode": "manual", "sublattice_site_ratios": [1.0, 2.0], "sublattice_configurations": [["AL", "CR"]]}, "conditions": {"P": [101325], "T": [300]}, "excluded_model_contributions": ["idmix", "mag"], "output": "SM_FORM", "values": [[[5.59631999999999]]]}] config_tup = (('AL',), ('CR',)) calculate_dict = get_prop_samples(data, config_tup) sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, list(map(len, config_tup))) # First test without any magnetic parameters dbf_nomag = Database(""" ELEMENT AL FCC_A1 26.982 4577.3 28.322 ! ELEMENT CR BCC_A2 51.996 4050.0 23.56 ! PHASE ALCR2 % 2 1.0 2.0 ! CONSTITUENT ALCR2 :AL,CR:AL,CR: ! """) mod_nomag = Model(dbf_nomag, ['AL', 'CR'], 'ALCR2') qty_nomag = get_data_quantities('SM_FORM', mod_nomag, [0], data, sample_condition_dicts) print(qty_nomag) assert np.all(np.isclose([16.78896], qty_nomag)) # Then with magnetic parameters, which are excluded model contributions dbf = Database(""" ELEMENT AL FCC_A1 2.6982E+01 4.5773E+03 2.8322E+01! ELEMENT CR BCC_A2 5.1996E+01 4.0500E+03 2.3560E+01! TYPE_DEFINITION & GES A_P_D ALCR2 MAGNETIC -1.0 4.00000E-01 ! PHASE ALCR2 %& 2 1 2 ! CONSTITUENT ALCR2 :AL,CR : AL,CR : ! PARAMETER TC(ALCR2,AL:AL;0) 298.15 -619; 6000 N REF0 ! PARAMETER BMAGN(ALCR2,AL:AL;0) 298.15 -.92; 6000 N REF0 ! PARAMETER TC(ALCR2,CR:AL;0) 298.15 -619; 6000 N REF0 ! PARAMETER BMAGN(ALCR2,CR:AL;0) 298.15 -.92; 6000 N REF0 ! PARAMETER TC(ALCR2,AL:CR;0) 298.15 -619; 6000 N REF0 ! PARAMETER BMAGN(ALCR2,AL:CR;0) 298.15 -.92; 6000 N REF0 ! PARAMETER TC(ALCR2,CR:CR;0) 298.15 -619; 6000 N REF0 ! PARAMETER BMAGN(ALCR2,CR:CR;0) 298.15 -.92; 6000 N REF0 ! PARAMETER TC(ALCR2,AL,CR:AL;0) 298.15 -485; 6000 N REF0 ! PARAMETER BMAGN(ALCR2,AL,CR:AL;0) 298.15 -.92; 6000 N REF0 ! PARAMETER TC(ALCR2,AL,CR:CR;0) 298.15 -485; 6000 N REF0 ! PARAMETER BMAGN(ALCR2,AL,CR:CR;0) 298.15 -.92; 6000 N REF0 ! """) mod = Model(dbf, ['AL', 'CR'], 'ALCR2') qty = get_data_quantities('SM_FORM', mod, [0], data, sample_condition_dicts) print(qty) assert np.all(np.isclose([16.78896], qty))
def test_get_data_quantities_AL_NI_VA_interaction(): """Test that an interaction with a VA produces the correct data quantities We just have a template database that has the phase defined. We then hot patch the Model object to have the GM from the fixed model we printed out and the data we printed out. The hot patch is needed because this is formation enthalpy data and the model needs to have the lower order terms in composition. One possible issue is that the new GM in the fixed model does not have any individual contributions, so it cannot be used to test excluded model contributions. The only excluded model contributions in this data are idmix, but the property we are testing is HM_FORM, so the feature transform of the idmix property should be zero. """ # Hack the namespace to make the copy-pasted Gibbs energy function work from sympy import log, Piecewise T = v.T data = [{'components': ['AL', 'NI', 'VA'], 'phases': ['BCC_B2'], 'solver': {'mode': 'manual', 'sublattice_occupancies': [[1.0, [0.5, 0.5], 1.0], [1.0, [0.75, 0.25], 1.0]], 'sublattice_site_ratios': [0.5, 0.5, 1.0], 'sublattice_configurations': (('AL', ('NI', 'VA'), 'VA'), ('AL', ('NI', 'VA'), 'VA')), 'comment': 'BCC_B2 sublattice configuration (2SL)'}, 'conditions': {'P': 101325.0, 'T': np.array([300.])}, 'reference_state': 'SGTE91', 'output': 'HM_FORM', 'values': np.array([[[-40316.61077, -56361.58554]]]), 'reference': 'C. Jiang 2009 (constrained SQS)', 'excluded_model_contributions': ['idmix']}, {'components': ['AL', 'NI', 'VA'], 'phases': ['BCC_B2'], 'solver': {'mode': 'manual', 'sublattice_occupancies': [[1.0, [0.5, 0.5], 1.0], [1.0, [0.75, 0.25], 1.0]], 'sublattice_site_ratios': [0.5, 0.5, 1.0], 'sublattice_configurations': (('AL', ('NI', 'VA'), 'VA'), ('AL', ('NI', 'VA'), 'VA')), 'comment': 'BCC_B2 sublattice configuration (2SL)'}, 'conditions': {'P': 101325.0, 'T': np.array([300.])}, 'reference_state': 'SGTE91', 'output': 'HM_FORM', 'values': np.array([[[-41921.43363, -57769.49473]]]), 'reference': 'C. Jiang 2009 (relaxed SQS)', 'excluded_model_contributions': ['idmix']}] NEW_GM = 8.3145*T*(0.5*Piecewise((v.SiteFraction("BCC_B2", 0, "AL")*log(v.SiteFraction("BCC_B2", 0, "AL")), v.SiteFraction("BCC_B2", 0, "AL") > 1.0e-16), (0, True))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) + 0.5*Piecewise((v.SiteFraction("BCC_B2", 0, "NI")*log(v.SiteFraction("BCC_B2", 0, "NI")), v.SiteFraction("BCC_B2", 0, "NI") > 1.0e-16), (0, True))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) + 0.5*Piecewise((v.SiteFraction("BCC_B2", 0, "VA")*log(v.SiteFraction("BCC_B2", 0, "VA")), v.SiteFraction("BCC_B2", 0, "VA") > 1.0e-16), (0, True))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) + 0.5*Piecewise((v.SiteFraction("BCC_B2", 1, "AL")*log(v.SiteFraction("BCC_B2", 1, "AL")), v.SiteFraction("BCC_B2", 1, "AL") > 1.0e-16), (0, True))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) + 0.5*Piecewise((v.SiteFraction("BCC_B2", 1, "NI")*log(v.SiteFraction("BCC_B2", 1, "NI")), v.SiteFraction("BCC_B2", 1, "NI") > 1.0e-16), (0, True))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) + 0.5*Piecewise((v.SiteFraction("BCC_B2", 1, "VA")*log(v.SiteFraction("BCC_B2", 1, "VA")), v.SiteFraction("BCC_B2", 1, "VA") > 1.0e-16), (0, True))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) + Piecewise((v.SiteFraction("BCC_B2", 2, "VA")*log(v.SiteFraction("BCC_B2", 2, "VA")), v.SiteFraction("BCC_B2", 2, "VA") > 1.0e-16), (0, True))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI"))) + (45262.9*v.SiteFraction("BCC_B2", 0, "AL")*v.SiteFraction("BCC_B2", 0, "NI")*v.SiteFraction("BCC_B2", 1, "AL")*v.SiteFraction("BCC_B2", 2, "VA") + 45262.9*v.SiteFraction("BCC_B2", 0, "AL")*v.SiteFraction("BCC_B2", 1, "AL")*v.SiteFraction("BCC_B2", 1, "NI")*v.SiteFraction("BCC_B2", 2, "VA"))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) + (1.0*v.SiteFraction("BCC_B2", 0, "AL")*v.SiteFraction("BCC_B2", 1, "AL")*v.SiteFraction("BCC_B2", 2, "VA")*Piecewise((10083 - 4.813*T, (T >= 298.15) & (T < 2900.0)), (0, True)) + v.SiteFraction("BCC_B2", 0, "AL")*v.SiteFraction("BCC_B2", 1, "NI")*v.SiteFraction("BCC_B2", 2, "VA")*(9.52839e-8*T**3 + 0.00123463*T**2 + 0.000871898*T*log(T) + 1.31471*T - 64435.3 + 23095.2/T) + v.SiteFraction("BCC_B2", 0, "AL")*v.SiteFraction("BCC_B2", 1, "VA")*v.SiteFraction("BCC_B2", 2, "VA")*(10.0*T + 16432.5) + v.SiteFraction("BCC_B2", 0, "NI")*v.SiteFraction("BCC_B2", 1, "AL")*v.SiteFraction("BCC_B2", 2, "VA")*(9.52839e-8*T**3 + 0.00123463*T**2 + 0.000871898*T*log(T) + 1.31471*T - 64435.3 + 23095.2/T) + 1.0*v.SiteFraction("BCC_B2", 0, "NI")*v.SiteFraction("BCC_B2", 1, "NI")*v.SiteFraction("BCC_B2", 2, "VA")*Piecewise((8715.084 - 3.556*T, (T >= 298.15) & (T < 3000.0)), (0, True)) + 32790.6*v.SiteFraction("BCC_B2", 0, "NI")*v.SiteFraction("BCC_B2", 1, "VA")*v.SiteFraction("BCC_B2", 2, "VA") + v.SiteFraction("BCC_B2", 0, "VA")*v.SiteFraction("BCC_B2", 1, "AL")*v.SiteFraction("BCC_B2", 2, "VA")*(10.0*T + 16432.5) + 32790.6*v.SiteFraction("BCC_B2", 0, "VA")*v.SiteFraction("BCC_B2", 1, "NI")*v.SiteFraction("BCC_B2", 2, "VA"))/(0.5*v.SiteFraction("BCC_B2", 0, "AL") + 0.5*v.SiteFraction("BCC_B2", 0, "NI") + 0.5*v.SiteFraction("BCC_B2", 1, "AL") + 0.5*v.SiteFraction("BCC_B2", 1, "NI")) dbf = Database("""$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ Date: 2019-12-08 18:05 $ Components: AL, NI, VA $ Phases: BCC_B2 $ Generated by brandon (pycalphad 0.8.1.post1) $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ ELEMENT AL FCC_A1 26.982 4577.3 28.322 ! ELEMENT NI FCC_A1 58.69 4787.0 29.796 ! ELEMENT VA VACUUM 0.0 0.0 0.0 ! TYPE_DEFINITION % SEQ * ! DEFINE_SYSTEM_DEFAULT ELEMENT 2 ! DEFAULT_COMMAND DEFINE_SYSTEM_ELEMENT VA ! PHASE BCC_B2 % 3 0.5 0.5 1 ! CONSTITUENT BCC_B2 :AL,NI,VA:AL,NI,VA:VA: ! """) mod = Model(dbf, ['AL', 'NI', 'VA'], 'BCC_B2') dd = {ky: 0.0 for ky in mod.models.keys()} dd['GM'] = NEW_GM mod.models = dd print(mod.HM) config_tup = (('AL',), ('NI', 'VA'), ('VA',)) calculate_dict = get_prop_samples(data, config_tup) sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, list(map(len, config_tup))) qty = get_data_quantities('HM_FORM', mod, [0], data, sample_condition_dicts) print(qty) assert np.all(np.isclose([-6254.7802775, -5126.1206475, -7458.3974225, -6358.04118875], qty))
def test_get_data_quantities_mixing_entropy(): """Test that mixing entropy produces correct data quantities with excluded idmix model contribution """ data = [{'components': ['AL', 'CR'], 'phases': ['AL11CR2'], 'solver': {'mode': 'manual', 'sublattice_site_ratios': [10.0, 2.0], 'sublattice_configurations': (('AL', ('AL', 'CR')),), 'sublattice_occupancies': [[1.0, [0.5, 0.5]]]}, 'conditions': {'P': 101325.0, 'T': np.array([300.])}, 'output': 'SM_MIX', 'values': np.array([[[0.60605556]]]), 'reference': 'text 1 to write down reference for this work', 'comment': 'test 2 to write down comment for this work', 'excluded_model_contributions': ['idmix']}] dbf = Database(""" ELEMENT AL FCC_A1 26.982 4577.3 28.322 ! ELEMENT CR BCC_A2 51.996 4050.0 23.56 ! PHASE AL11CR2 % 2 10.0 2.0 ! CONSTITUENT AL11CR2 :AL:AL,CR: ! """) mod = Model(dbf, ['AL', 'CR'], 'AL11CR2') # desired_property, fixed_model, fixed_portions, data, samples config_tup = (('AL',), ('AL', 'CR')) calculate_dict = get_prop_samples(data, config_tup) sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, list(map(len, config_tup))) qty = get_data_quantities('SM_MIX', mod, [0], data, sample_condition_dicts) print(qty) assert np.all(np.isclose([7.27266667], qty))
def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datasets, ridge_alpha=None, aicc_phase_penalty=None, features=None): """ Find suitable linear model parameters for the given phase. We do this by successively fitting heat capacities, entropies and enthalpies of formation, and selecting against criteria to prevent overfitting. The "best" set of parameters minimizes the error without overfitting. Parameters ---------- dbf : Database pycalphad Database. Partially complete, so we know what degrees of freedom to fix. comps : [str] Names of the relevant components. phase_name : str Name of the desired phase for which the parameters will be found. configuration : ndarray Configuration of the sublattices for the fitting procedure. symmetry : [[int]] Symmetry of the sublattice configuration. datasets : PickleableTinyDB All the datasets desired to fit to. ridge_alpha : float Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate with ordinary least squares regression. For now, the parameter is applied to all features. aicc_feature_factors : dict Map of phase name to feature to a multiplication factor for the AICc's parameter penalty. features : dict Maps "property" to a list of features for the linear model. These will be transformed from "GM" coefficients e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None) Returns ------- dict {feature: estimated_value} """ aicc_feature_factors = aicc_phase_penalty if aicc_phase_penalty is not None else {} if interaction_test(configuration): _log.debug('ENDMEMBERS FROM INTERACTION: %s', endmembers_from_interaction(configuration)) fitting_steps = (["CPM_FORM", "CPM_MIX"], ["SM_FORM", "SM_MIX"], ["HM_FORM", "HM_MIX"]) else: # We are only fitting an endmember; no mixing data needed fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"]) # create the candidate models and fitting steps if features is None: features = OrderedDict([("CPM_FORM", (v.T * sympy.log(v.T), v.T**2, v.T**-1, v.T**3)), ("SM_FORM", (v.T,)), ("HM_FORM", (sympy.S.One,)), ]) # dict of {feature, [candidate_models]} candidate_models_features = build_candidate_models(configuration, features) # All possible parameter values that could be taken on. This is some legacy # code from before there were many candidate models built. For very large # sets of candidate models, this could be quite slow. # TODO: we might be able to remove this initialization for clarity, depends on fixed poritions parameters = {} for candidate_models in candidate_models_features.values(): for model in candidate_models: for coef in model: parameters[coef] = 0 # These is our previously fit partial model from previous steps # Subtract out all of these contributions (zero out reference state because these are formation properties) fixed_model = Model(dbf, comps, phase_name, parameters={'GHSER'+(c.upper()*2)[:2]: 0 for c in comps}) fixed_portions = [0] for desired_props in fitting_steps: feature_type = desired_props[0].split('_')[0] # HM_FORM -> HM aicc_factor = aicc_feature_factors.get(feature_type, 1.0) solver_qry = (where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) desired_data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) desired_data = filter_configurations(desired_data, configuration, symmetry) desired_data = filter_temperatures(desired_data) _log.trace('%s: datasets found: %s', desired_props, len(desired_data)) if len(desired_data) > 0: config_tup = tuple(map(tuplify, configuration)) calculate_dict = get_prop_samples(desired_data, config_tup) sample_condition_dicts = _get_sample_condition_dicts(calculate_dict, list(map(len, config_tup))) weights = calculate_dict['weights'] assert len(sample_condition_dicts) == len(weights) # We assume all properties in the same fitting step have the same # features (all CPM, all HM, etc., but different ref states). # data quantities are the same for each candidate model and can be computed up front data_qtys = get_data_quantities(feature_type, fixed_model, fixed_portions, desired_data, sample_condition_dicts) # build the candidate model transformation matrix and response vector (A, b in Ax=b) feature_matricies = [] data_quantities = [] for candidate_coefficients in candidate_models_features[desired_props[0]]: # Map coeffiecients in G to coefficients in the feature_type (H, S, CP) transformed_coefficients = list(map(feature_transforms[feature_type], candidate_coefficients)) if interaction_test(configuration, 3): feature_matricies.append(_build_feature_matrix(sample_condition_dicts, transformed_coefficients)) else: feature_matricies.append(_build_feature_matrix(sample_condition_dicts, transformed_coefficients)) data_quantities.append(data_qtys) # provide candidate models and get back a selected model. selected_model = select_model(zip(candidate_models_features[desired_props[0]], feature_matricies, data_quantities), ridge_alpha, weights=weights, aicc_factor=aicc_factor) selected_features, selected_values = selected_model parameters.update(zip(*(selected_features, selected_values))) # Add these parameters to be fixed for the next fitting step fixed_portion = np.array(selected_features, dtype=np.object_) fixed_portion = np.dot(fixed_portion, selected_values) fixed_portions.append(fixed_portion) return parameters