Exemple #1
0
def phase_fit(dbf,
              phase_name,
              symmetry,
              subl_model,
              site_ratios,
              datasets,
              refdata,
              aliases=None):
    """Generate an initial CALPHAD model for a given phase and sublattice model.

    Parameters
    ----------
    dbf : Database
        pycalphad Database to add parameters to.
    phase_name : str
        Name of the phase.
    symmetry : [[int]]
        Sublattice model symmetry.
    subl_model : [[str]]
        Sublattice model for the phase of interest.
    site_ratios : [float]
        Number of sites in each sublattice, normalized to one atom.
    datasets : PickleableTinyDB
        All datasets to consider for the calculation.
    refdata : dict
        Maps tuple(element, phase_name) -> SymPy object defining
        energy relative to SER
    aliases : [str]
        Alternative phase names. Useful for matching against
        reference data or other datasets. (Default value = None)

    Returns
    -------
    None
        Modifies the dbf.

    """
    if not hasattr(dbf, 'varcounter'):
        dbf.varcounter = 0
    # First fit endmembers
    all_em_count = len(list(itertools.product(*subl_model)))
    endmembers = sorted(
        set(canonicalize(i, symmetry) for i in itertools.product(*subl_model)))
    # Number of significant figures in parameters
    numdigits = 6
    em_dict = {}
    aliases = [] if aliases is None else aliases
    aliases = sorted(set(aliases + [phase_name]))
    logging.info('FITTING: {}'.format(phase_name))
    logging.debug('{0} endmembers ({1} distinct by symmetry)'.format(
        all_em_count, len(endmembers)))

    def _to_tuple(x):
        if isinstance(x, list) or isinstance(x, tuple):
            return tuple(x)
        else:
            return tuple([x])

    all_endmembers = []
    for endmember in endmembers:
        logging.debug('ENDMEMBER: {}'.format(endmember))
        # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91
        # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting
        endmember_comps = list(set(endmember))
        fit_eq = None
        # only one non-VA component, or two components but the other is VA and its only the last sublattice
        if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\
                ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)):
            # this is a "pure component endmember"
            # try all phase name aliases until we get run out or get a hit
            em_comp = list(set(endmember_comps) - {'VA'})[0]
            sym_name = None
            for name in aliases:
                sym_name = 'G' + name[:3].upper() + em_comp.upper()
                stability = refdata.get((em_comp.upper(), name.upper()), None)
                if stability is not None:
                    if isinstance(stability, sympy.Piecewise):
                        # Default zero required for the compiled backend
                        if (0, True) not in stability.args:
                            new_args = stability.args + ((0, True), )
                            stability = sympy.Piecewise(*new_args)
                    dbf.symbols[sym_name] = stability
                    break
            if dbf.symbols.get(sym_name, None) is not None:
                num_moles = sum([
                    sites for elem, sites in zip(endmember, site_ratios)
                    if elem != 'VA'
                ])
                fit_eq = num_moles * sympy.Symbol(sym_name)
        if fit_eq is None:
            # No reference lattice stability data -- we have to fit it
            parameters = fit_formation_energy(dbf, sorted(dbf.elements),
                                              phase_name, endmember, symmetry,
                                              datasets)
            for key, value in sorted(parameters.items(), key=str):
                if value == 0:
                    continue
                symbol_name = 'VV' + str(dbf.varcounter).zfill(4)
                while dbf.symbols.get(symbol_name, None) is not None:
                    dbf.varcounter += 1
                    symbol_name = 'VV' + str(dbf.varcounter).zfill(4)
                dbf.symbols[symbol_name] = sigfigs(value, numdigits)
                parameters[key] = sympy.Symbol(symbol_name)
            fit_eq = sympy.Add(
                *[value * key for key, value in parameters.items()])
            ref = 0
            for subl, ratio in zip(endmember, site_ratios):
                if subl == 'VA':
                    continue
                subl = (subl.upper() * 2)[:2]
                ref = ref + ratio * sympy.Symbol('GHSER' + subl)
            fit_eq += ref
        symmetric_endmembers = _generate_symmetric_group(endmember, symmetry)
        logging.debug('SYMMETRIC_ENDMEMBERS: {}'.format(symmetric_endmembers))
        all_endmembers.extend(symmetric_endmembers)
        for em in symmetric_endmembers:
            em_dict[em] = fit_eq
            dbf.add_parameter('G', phase_name, tuple(map(_to_tuple, em)), 0,
                              fit_eq)
    # Now fit all binary interactions
    # Need to use 'all_endmembers' instead of 'endmembers' because you need to generate combinations
    # of ALL endmembers, not just symmetry equivalent ones
    bin_interactions = list(itertools.combinations(all_endmembers, 2))
    transformed_bin_interactions = []
    for first_endmember, second_endmember in bin_interactions:
        interaction = []
        for first_occupant, second_occupant in zip(first_endmember,
                                                   second_endmember):
            if first_occupant == second_occupant:
                interaction.append(first_occupant)
            else:
                interaction.append(
                    tuple(sorted([first_occupant, second_occupant])))
        transformed_bin_interactions.append(interaction)

    def bin_int_sort_key(x):
        interacting_sublattices = sum(
            (isinstance(n, (list, tuple)) and len(n) == 2) for n in x)
        return canonical_sort_key((interacting_sublattices, ) + x)

    bin_interactions = sorted(set(
        canonicalize(i, symmetry) for i in transformed_bin_interactions),
                              key=bin_int_sort_key)
    logging.debug('{0} distinct binary interactions'.format(
        len(bin_interactions)))
    for interaction in bin_interactions:
        ixx = []
        for i in interaction:
            if isinstance(i, (tuple, list)):
                ixx.append(tuple(i))
            else:
                ixx.append(i)
        ixx = tuple(ixx)
        logging.debug('INTERACTION: {}'.format(ixx))
        parameters = fit_formation_energy(dbf, sorted(dbf.elements),
                                          phase_name, ixx, symmetry, datasets)
        # Organize parameters by polynomial degree
        degree_polys = np.zeros(10, dtype=np.object)
        for degree in reversed(range(10)):
            check_symbol = sympy.Symbol('YS') * sympy.Symbol('Z')**degree
            keys_to_remove = []
            for key, value in sorted(parameters.items(), key=str):
                if key.has(check_symbol):
                    if value != 0:
                        symbol_name = 'VV' + str(dbf.varcounter).zfill(4)
                        while dbf.symbols.get(symbol_name, None) is not None:
                            dbf.varcounter += 1
                            symbol_name = 'VV' + str(dbf.varcounter).zfill(4)
                        dbf.symbols[symbol_name] = sigfigs(
                            parameters[key], numdigits)
                        parameters[key] = sympy.Symbol(symbol_name)
                    coef = parameters[key] * (key / check_symbol)
                    try:
                        coef = float(coef)
                    except TypeError:
                        pass
                    degree_polys[degree] += coef
                    keys_to_remove.append(key)
            for key in keys_to_remove:
                parameters.pop(key)
        logging.debug('Polynomial coefs: {}'.format(degree_polys))
        # Insert into database
        symmetric_interactions = _generate_symmetric_group(
            interaction, symmetry)
        for degree in np.arange(degree_polys.shape[0]):
            if degree_polys[degree] != 0:
                for syminter in symmetric_interactions:
                    dbf.add_parameter('L', phase_name,
                                      tuple(map(_to_tuple, syminter)), degree,
                                      degree_polys[degree])
    # TODO: fit ternary interactions
    if hasattr(dbf, 'varcounter'):
        del dbf.varcounter
Exemple #2
0
def phase_fit(dbf, phase_name, symmetry, datasets, refdata, ridge_alpha, aicc_penalty=None, aliases=None):
    """Generate an initial CALPHAD model for a given phase and sublattice model.

    Parameters
    ----------
    dbf : Database
        pycalphad Database to add parameters to.
    phase_name : str
        Name of the phase.
    symmetry : [[int]]
        Sublattice model symmetry.
    datasets : PickleableTinyDB
        All datasets to consider for the calculation.
    refdata : dict
        Maps tuple(element, phase_name) -> SymPy object defining
        energy relative to SER
    ridge_alpha : float
        Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate
        with ordinary least squares regression. For now, the parameter is applied to all features.
    aicc_penalty : dict
        Map of phase name to feature to a multiplication factor for the AICc's parameter penalty.
    aliases : Dict[str, str]
        Mapping of possible aliases to the Database phase names.

    Returns
    -------
    None
        Modifies the dbf.

    """
    aicc_penalty = aicc_penalty if aicc_penalty is not None else {}
    aicc_phase_penalty = aicc_penalty.get(phase_name, {})
    if not hasattr(dbf, 'varcounter'):
        dbf.varcounter = 0
    phase_obj = dbf.phases[phase_name]
    # TODO: assumed pure elements - add proper support for Species objects
    subl_model = [sorted([sp.name for sp in subl]) for subl in phase_obj.constituents]
    site_ratios = phase_obj.sublattices
    # First fit endmembers
    all_em_count = len(generate_endmembers(subl_model))  # number of total endmembers
    endmembers = generate_endmembers(subl_model, symmetry)
    # Number of significant figures in parameters, might cause rounding errors
    numdigits = 6
    em_dict = {}
    # TODO: use the global aliases dictionary passed in as-is instead of converting it to a phase-local dict
    # TODO: use the aliases dictionary in dataset queries to find relevant data
    if aliases is None:
        aliases = [phase_name]
    else:
        aliases = sorted([alias for alias, database_phase in aliases.items() if database_phase == phase_name])
    _log.info('FITTING: %s', phase_name)
    _log.trace('%s endmembers (%s distinct by symmetry)', all_em_count, len(endmembers))

    all_endmembers = []
    for endmember in endmembers:
        symmetric_endmembers = generate_symmetric_group(endmember, symmetry)
        all_endmembers.extend(symmetric_endmembers)
        if _param_present_in_database(dbf, phase_name, endmember, 'G'):
            _log.trace('ENDMEMBER: %s already in Database. Skipping.', endmember)
            continue
        else:
            _log.trace('ENDMEMBER: %s', endmember)
        # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91
        # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting
        endmember_comps = list(set(endmember))
        fit_eq = None
        # only one non-VA component, or two components but the other is VA and its only the last sublattice
        if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\
                ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)):
            # this is a "pure component endmember"
            # try all phase name aliases until we get run out or get a hit
            em_comp = list(set(endmember_comps) - {'VA'})[0]
            sym_name = None
            for name in aliases:
                sym_name = 'G'+name[:3].upper()+em_comp.upper()
                stability = refdata.get((em_comp.upper(), name.upper()), None)
                if stability is not None:
                    if isinstance(stability, sympy.Piecewise):
                        # Default zero required for the compiled backend
                        if (0, True) not in stability.args:
                            new_args = stability.args + ((0, True),)
                            stability = sympy.Piecewise(*new_args)
                    dbf.symbols[sym_name] = stability
                    break
            if dbf.symbols.get(sym_name, None) is not None:
                num_moles = sum([sites for elem, sites in zip(endmember, site_ratios) if elem != 'VA'])
                fit_eq = num_moles * Symbol(sym_name)
                _log.trace("Found lattice stability: %s", sym_name)
                _log.debug("%s = %s", sym_name, dbf.symbols[sym_name])
        if fit_eq is None:
            # No reference lattice stability data -- we have to fit it
            parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, endmember, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty)
            for key, value in sorted(parameters.items(), key=str):
                if value == 0:
                    continue
                symbol_name = get_next_symbol(dbf)
                dbf.symbols[symbol_name] = sigfigs(value, numdigits)
                parameters[key] = Symbol(symbol_name)
            fit_eq = sympy.Add(*[value * key for key, value in parameters.items()])
            ref = 0
            for subl, ratio in zip(endmember, site_ratios):
                if subl == 'VA':
                    continue
                subl = (subl.upper()*2)[:2]
                ref = ref + ratio * Symbol('GHSER'+subl)
            fit_eq += ref
        _log.trace('SYMMETRIC_ENDMEMBERS: %s', symmetric_endmembers)
        for em in symmetric_endmembers:
            em_dict[em] = fit_eq
            dbf.add_parameter('G', phase_name, tuple(map(tuplify, em)), 0, fit_eq)

    _log.trace('FITTING BINARY INTERACTIONS')
    bin_interactions = generate_interactions(all_endmembers, order=2, symmetry=symmetry)
    _log.trace('%s distinct binary interactions', len(bin_interactions))
    for interaction in bin_interactions:
        ixx = []
        for i in interaction:
            if isinstance(i, (tuple, list)):
                ixx.append(tuple(i))
            else:
                ixx.append(i)
        ixx = tuple(ixx)
        config = tuple(map(tuplify, ixx))
        if _param_present_in_database(dbf, phase_name, config, 'L'):
            _log.trace('INTERACTION: %s already in Database', ixx)
            continue
        else:
            _log.trace('INTERACTION: %s', ixx)
        parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, ixx, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty)
        # Organize parameters by polynomial degree
        degree_polys = np.zeros(10, dtype=np.object_)
        for degree in reversed(range(10)):
            check_symbol = Symbol('YS') * Symbol('Z')**degree
            keys_to_remove = []
            for key, value in sorted(parameters.items(), key=str):
                if key.has(check_symbol):
                    if value != 0:
                        symbol_name = get_next_symbol(dbf)
                        dbf.symbols[symbol_name] = sigfigs(parameters[key], numdigits)
                        parameters[key] = Symbol(symbol_name)
                    coef = parameters[key] * (key / check_symbol)
                    try:
                        coef = float(coef)
                    except TypeError:
                        pass
                    degree_polys[degree] += coef
                    keys_to_remove.append(key)
            for key in keys_to_remove:
                parameters.pop(key)
        _log.trace('Polynomial coefs: %s', degree_polys.tolist())
        # Insert into database
        symmetric_interactions = generate_symmetric_group(interaction, symmetry)
        for degree in np.arange(degree_polys.shape[0]):
            if degree_polys[degree] != 0:
                for syminter in symmetric_interactions:
                    dbf.add_parameter('L', phase_name, tuple(map(tuplify, syminter)), degree, degree_polys[degree])

    _log.trace('FITTING TERNARY INTERACTIONS')
    fit_ternary_interactions(dbf, phase_name, symmetry, all_endmembers, datasets, aicc_phase_penalty=aicc_phase_penalty)
    if hasattr(dbf, 'varcounter'):
        del dbf.varcounter
Exemple #3
0
def phase_fit(dbf, phase_name, symmetry, subl_model, site_ratios, datasets, refdata, ridge_alpha, aicc_penalty=None, aliases=None):
    """Generate an initial CALPHAD model for a given phase and sublattice model.

    Parameters
    ----------
    dbf : Database
        pycalphad Database to add parameters to.
    phase_name : str
        Name of the phase.
    symmetry : [[int]]
        Sublattice model symmetry.
    subl_model : [[str]]
        Sublattice model for the phase of interest.
    site_ratios : [float]
        Number of sites in each sublattice, normalized to one atom.
    datasets : PickleableTinyDB
        All datasets to consider for the calculation.
    refdata : dict
        Maps tuple(element, phase_name) -> SymPy object defining
        energy relative to SER
    ridge_alpha : float
        Value of the $alpha$ hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate
        with ordinary least squares regression. For now, the parameter is applied to all features.
    aicc_penalty : dict
        Map of phase name to feature to a multiplication factor for the AICc's parameter penalty.
    aliases : [str]
        Alternative phase names. Useful for matching against
        reference data or other datasets. (Default value = None)

    Returns
    -------
    None
        Modifies the dbf.

    """
    aicc_penalty = aicc_penalty if aicc_penalty is not None else {}
    aicc_phase_penalty = aicc_penalty.get(phase_name, {})
    if not hasattr(dbf, 'varcounter'):
        dbf.varcounter = 0
    # First fit endmembers
    all_em_count = len(generate_endmembers(subl_model))  # number of total endmembers
    endmembers = generate_endmembers(subl_model, symmetry)
    # Number of significant figures in parameters, might cause rounding errors
    numdigits = 6
    em_dict = {}
    aliases = [] if aliases is None else aliases
    aliases = sorted(set(aliases + [phase_name]))
    logging.info('FITTING: {}'.format(phase_name))
    logging.log(TRACE, '{0} endmembers ({1} distinct by symmetry)'.format(all_em_count, len(endmembers)))

    all_endmembers = []
    for endmember in endmembers:
        logging.log(TRACE, 'ENDMEMBER: {}'.format(endmember))
        # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91
        # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting
        endmember_comps = list(set(endmember))
        fit_eq = None
        # only one non-VA component, or two components but the other is VA and its only the last sublattice
        if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\
                ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)):
            # this is a "pure component endmember"
            # try all phase name aliases until we get run out or get a hit
            em_comp = list(set(endmember_comps) - {'VA'})[0]
            sym_name = None
            for name in aliases:
                sym_name = 'G'+name[:3].upper()+em_comp.upper()
                stability = refdata.get((em_comp.upper(), name.upper()), None)
                if stability is not None:
                    if isinstance(stability, sympy.Piecewise):
                        # Default zero required for the compiled backend
                        if (0, True) not in stability.args:
                            new_args = stability.args + ((0, True),)
                            stability = sympy.Piecewise(*new_args)
                    dbf.symbols[sym_name] = stability
                    break
            if dbf.symbols.get(sym_name, None) is not None:
                num_moles = sum([sites for elem, sites in zip(endmember, site_ratios) if elem != 'VA'])
                fit_eq = num_moles * sympy.Symbol(sym_name)
        if fit_eq is None:
            # No reference lattice stability data -- we have to fit it
            parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, endmember, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty)
            for key, value in sorted(parameters.items(), key=str):
                if value == 0:
                    continue
                symbol_name = get_next_symbol(dbf)
                dbf.symbols[symbol_name] = sigfigs(value, numdigits)
                parameters[key] = sympy.Symbol(symbol_name)
            fit_eq = sympy.Add(*[value * key for key, value in parameters.items()])
            ref = 0
            for subl, ratio in zip(endmember, site_ratios):
                if subl == 'VA':
                    continue
                subl = (subl.upper()*2)[:2]
                ref = ref + ratio * sympy.Symbol('GHSER'+subl)
            fit_eq += ref
        symmetric_endmembers = generate_symmetric_group(endmember, symmetry)
        logging.log(TRACE, 'SYMMETRIC_ENDMEMBERS: {}'.format(symmetric_endmembers))
        all_endmembers.extend(symmetric_endmembers)
        for em in symmetric_endmembers:
            em_dict[em] = fit_eq
            dbf.add_parameter('G', phase_name, tuple(map(tuplify, em)), 0, fit_eq)

    logging.log(TRACE, 'FITTING BINARY INTERACTIONS')
    bin_interactions = generate_interactions(all_endmembers, order=2, symmetry=symmetry)
    logging.log(TRACE, '{0} distinct binary interactions'.format(len(bin_interactions)))
    for interaction in bin_interactions:
        ixx = []
        for i in interaction:
            if isinstance(i, (tuple, list)):
                ixx.append(tuple(i))
            else:
                ixx.append(i)
        ixx = tuple(ixx)
        logging.log(TRACE, 'INTERACTION: {}'.format(ixx))
        parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, ixx, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty)
        # Organize parameters by polynomial degree
        degree_polys = np.zeros(10, dtype=np.object)
        for degree in reversed(range(10)):
            check_symbol = sympy.Symbol('YS') * sympy.Symbol('Z')**degree
            keys_to_remove = []
            for key, value in sorted(parameters.items(), key=str):
                if key.has(check_symbol):
                    if value != 0:
                        symbol_name = get_next_symbol(dbf)
                        dbf.symbols[symbol_name] = sigfigs(parameters[key], numdigits)
                        parameters[key] = sympy.Symbol(symbol_name)
                    coef = parameters[key] * (key / check_symbol)
                    try:
                        coef = float(coef)
                    except TypeError:
                        pass
                    degree_polys[degree] += coef
                    keys_to_remove.append(key)
            for key in keys_to_remove:
                parameters.pop(key)
        logging.log(TRACE, 'Polynomial coefs: {}'.format(degree_polys))
        # Insert into database
        symmetric_interactions = generate_symmetric_group(interaction, symmetry)
        for degree in np.arange(degree_polys.shape[0]):
            if degree_polys[degree] != 0:
                for syminter in symmetric_interactions:
                    dbf.add_parameter('L', phase_name, tuple(map(tuplify, syminter)), degree, degree_polys[degree])

    logging.log(TRACE, 'FITTING TERNARY INTERACTIONS')
    fit_ternary_interactions(dbf, phase_name, symmetry, all_endmembers, datasets, aicc_phase_penalty=aicc_phase_penalty)
    if hasattr(dbf, 'varcounter'):
        del dbf.varcounter