def test_extract_aliases(reason, phase_models, expected_aliases): if expected_aliases is None: with pytest.raises(ValueError): aliases = extract_aliases(phase_models) print(aliases) else: assert extract_aliases(phase_models) == expected_aliases, reason
def generate_parameters(phase_models, datasets, ref_state, excess_model, ridge_alpha=None, aicc_penalty_factor=None, dbf=None): """Generate parameters from given phase models and datasets Parameters ---------- phase_models : dict Dictionary of components and phases to fit. datasets : PickleableTinyDB database of single- and multi-phase to fit. ref_state : str String of the reference data to use, e.g. 'SGTE91' or 'SR2016' excess_model : str String of the type of excess model to fit to, e.g. 'linear' ridge_alpha : float Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults to None, which falls back to ordinary least squares regression. For now, the parameter is applied to all features. aicc_penalty_factor : dict Map of phase name to feature to a multiplication factor for the AICc's parameter penalty. dbf : Database Initial pycalphad Database that can have parameters that would not be fit by ESPEI Returns ------- pycalphad.Database """ # Set NumPy print options so logged arrays print on one line. Reset at the end. np.set_printoptions(linewidth=sys.maxsize) _log.info('Generating parameters.') _log.trace('Found the following user reference states: %s', espei.refdata.INSERTED_USER_REFERENCE_STATES) refdata = getattr(espei.refdata, ref_state) aliases = extract_aliases(phase_models) dbf = initialize_database(phase_models, ref_state, dbf) # Fit phases in alphabetic order so the VV#### counter is constistent between runs for phase_name, phase_data in sorted(phase_models['phases'].items(), key=operator.itemgetter(0)): if phase_name in dbf.phases: symmetry = phase_data.get('equivalent_sublattices', None) phase_fit(dbf, phase_name, symmetry, datasets, refdata, ridge_alpha, aicc_penalty=aicc_penalty_factor, aliases=aliases) _log.info('Finished generating parameters.') np.set_printoptions(linewidth=75) return dbf
def initialize_database(phase_models, ref_state, dbf=None, fallback_ref_state="SGTE91"): """Return a Database boostraped with elements, species, phases and unary lattice stabilities. Parameters ---------- phase_models : Dict[str, Any] Dictionary of components and phases to fit. ref_state : str String of the reference data to use, e.g. 'SGTE91' or 'SR2016' dbf : Optional[Database] Initial pycalphad Database that can have parameters that would not be fit by ESPEI fallback_ref_state : str String of the reference data to use for SER data, defaults to 'SGTE91' Returns ------- Database A new pycalphad Database object, or a modified one if it was given. """ if dbf is None: dbf = Database() lattice_stabilities = getattr(espei.refdata, ref_state) ser_stability = getattr(espei.refdata, ref_state + "Stable") aliases = extract_aliases(phase_models) phases = sorted({ph.upper() for ph in phase_models["phases"].keys()}) elements = {el.upper() for el in phase_models["components"]} dbf.elements.update(elements) dbf.species.update({v.Species(el, {el: 1}, 0) for el in elements}) # Add SER reference data for this element for element in dbf.elements: if element in dbf.refstates: continue # Do not clobber user reference states el_ser_data = _get_ser_data(element, ref_state, fallback_ref_state=fallback_ref_state) # Try to look up the alias that we are using in this fitting el_ser_data["phase"] = aliases.get(el_ser_data["phase"], el_ser_data["phase"]) # Don't warn if the element is a species with no atoms because per-atom # formation energies are not possible (e.g. VA (VACUUM) or /- (ELECTRON_GAS)) if el_ser_data["phase"] not in phases and v.Species( element).number_of_atoms != 0: # We have the Gibbs energy expression that we need in the reference # data, but this phase is not a candidate in the phase models. The # phase won't be added to the database, so looking up the phases's # energy won't work. _log.warning( "The reference phase for %s, %s, is not in the supplied phase models " "and won't be added to the Database phases. Fitting formation " "energies will not be possible.", element, el_ser_data["phase"]) dbf.refstates[element] = el_ser_data # Add the phases for phase_name, phase_data in phase_models['phases'].items(): if phase_name not in dbf.phases.keys(): # Do not clobber user phases # TODO: Need to support model hints for: magnetic, order-disorder, etc. site_ratios = phase_data['sublattice_site_ratios'] subl_model = phase_data['sublattice_model'] # Only generate the sublattice model for active components subl_model = [ sorted(set(subl).intersection(dbf.elements)) for subl in subl_model ] if all(len(subl) > 0 for subl in subl_model): dbf.add_phase(phase_name, dict(), site_ratios) dbf.add_phase_constituents(phase_name, subl_model) # Add the GHSER functions to the Database for element in dbf.elements: # Use setdefault here to not clobber user-provided functions if element == "VA": dbf.symbols.setdefault("GHSERVA", 0) else: # note that `c.upper()*2)[:2]` returns "AL" for "Al" and "BB" for "B" # Using this ensures that GHSER functions will be unique, e.g. # GHSERC would be an abbreviation for GHSERCA. sym_name = "GHSER" + (element.upper() * 2)[:2] dbf.symbols.setdefault(sym_name, ser_stability[element]) return dbf