Esempio n. 1
0
def calculate_(species: Sequence[v.Species], phases: Sequence[str],
               str_statevar_dict: Dict[str, np.ndarray], models: Dict[str, Model],
               phase_records: Dict[str, PhaseRecord], output: Optional[str] = 'GM',
               points: Optional[Dict[str, np.ndarray]] = None,
               pdens: Optional[int] = 50, broadcast: Optional[bool] = True,
               fake_points: Optional[bool] = False,
               ) -> LightDataset:
    """
    Quickly sample phase internal degree of freedom with virtually no overhead.
    """
    points_dict = unpack_kwarg(points, default_arg=None)
    pdens_dict = unpack_kwarg(pdens, default_arg=50)
    nonvacant_components = [x for x in sorted(species) if x.number_of_atoms > 0]
    maximum_internal_dof = max(prx.phase_dof for prx in phase_records.values())
    all_phase_data = []
    for phase_name in sorted(phases):
        mod = models[phase_name]
        phase_record = phase_records[phase_name]
        points = points_dict[phase_name]
        if points is None:
            points = _sample_phase_constitution(mod, point_sample, True, pdens_dict[phase_name])
        points = np.atleast_2d(points)

        fp = fake_points and (phase_name == sorted(phases)[0])
        phase_ds = _compute_phase_values(nonvacant_components, str_statevar_dict,
                                         points, phase_record, output,
                                         maximum_internal_dof, broadcast=broadcast,
                                         largest_energy=float(1e10), fake_points=fp,
                                         parameters={})
        all_phase_data.append(phase_ds)

    # assumes phase_records all have the same nonvacant pure elements,
    # even if those elements are not present in this phase record
    fp_offset = len(tuple(phase_records.values())[0].nonvacant_elements) if fake_points else 0
    running_total = [fp_offset] + list(np.cumsum([phase_ds['X'].shape[-2] for phase_ds in all_phase_data]))
    islice_by_phase = {phase_name: slice(running_total[phase_idx], running_total[phase_idx+1], None)
                       for phase_idx, phase_name in enumerate(sorted(phases))}

    if len(all_phase_data) > 1:
        concatenated_coords = all_phase_data[0].coords

        data_vars = all_phase_data[0].data_vars
        concatenated_data_vars = {}
        for var in data_vars.keys():
            data_coords = data_vars[var][0]
            points_idx = data_coords.index('points')  # concatenation axis
            arrs = []
            for phase_data in all_phase_data:
                arrs.append(getattr(phase_data, var))
            concat_data = np.concatenate(arrs, axis=points_idx)
            concatenated_data_vars[var] = (data_coords, concat_data)
        final_ds = LightDataset(data_vars=concatenated_data_vars, coords=concatenated_coords)
    else:
        final_ds = all_phase_data[0]
    final_ds.attrs['phase_indices'] = islice_by_phase
    return final_ds
Esempio n. 2
0
def get_zpf_data(dbf: Database,
                 comps: Sequence[str],
                 phases: Sequence[str],
                 datasets: PickleableTinyDB,
                 parameters: Dict[str, float],
                 model: Optional[Dict[str, Type[Model]]] = None):
    """
    Return the ZPF data used in the calculation of ZPF error

    Parameters
    ----------
    comps : list
        List of active component names
    phases : list
        List of phases to consider
    datasets : espei.utils.PickleableTinyDB
        Datasets that contain single phase data
    parameters : dict
        Dictionary mapping symbols to optimize to their initial values
    model : Optional[Dict[str, Type[Model]]]
        Dictionary phase names to pycalphad Model classes.

    Returns
    -------
    list
        List of data dictionaries with keys ``weight``, ``phase_regions`` and ``dataset_references``.
    """
    desired_data = datasets.search(
        (tinydb.where('output') == 'ZPF')
        & (tinydb.where('components').test(lambda x: set(x).issubset(comps)))
        & (tinydb.where('phases').test(
            lambda x: len(set(phases).intersection(x)) > 0)))

    zpf_data = []  # 1:1 correspondence with each dataset
    for data in desired_data:
        data_comps = list(set(data['components']).union({'VA'}))
        species = sorted(unpack_components(dbf, data_comps), key=str)
        data_phases = filter_phases(dbf, species, candidate_phases=phases)
        models = instantiate_models(dbf,
                                    species,
                                    data_phases,
                                    model=model,
                                    parameters=parameters)
        # assumed N, P, T state variables
        phase_recs = build_phase_records(dbf,
                                         species,
                                         data_phases, {v.N, v.P, v.T},
                                         models,
                                         parameters=parameters,
                                         build_gradients=True,
                                         build_hessians=True)
        all_phase_points = {
            phase_name: _sample_phase_constitution(models[phase_name],
                                                   point_sample, True, 50)
            for phase_name in data_phases
        }
        all_regions = data['values']
        conditions = data['conditions']
        phase_regions = []
        # Each phase_region is one set of phases in equilibrium (on a tie-line),
        # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]]
        for idx, phase_region in enumerate(all_regions):
            # Extract the conditions for entire phase region
            pot_conds = _extract_pot_conds(conditions, idx)
            pot_conds.setdefault(v.N, 1.0)  # Add v.N condition, if missing
            # Extract all the phases and compositions from the tie-line points
            vertices = []
            for vertex in phase_region:
                phase_name, comp_conds, disordered_flag = _extract_phases_comps(
                    vertex)
                # Construct single-phase points satisfying the conditions for each phase in the region
                mod = models[phase_name]
                composition = _compute_vertex_composition(
                    data_comps, comp_conds)
                if np.any(np.isnan(composition)):
                    # We can't construct points because we don't have a known composition
                    has_missing_comp_cond = True
                    phase_points = None
                elif _phase_is_stoichiometric(mod):
                    has_missing_comp_cond = False
                    phase_points = None
                else:
                    has_missing_comp_cond = False
                    # Only sample points that have an average mass residual within tol
                    tol = 0.02
                    phase_points = _subsample_phase_points(
                        phase_recs[phase_name], all_phase_points[phase_name],
                        composition, tol)
                    assert phase_points.shape[
                        0] > 0, f"phase {phase_name} must have at least one set of points within the target tolerance {pot_conds} {comp_conds}"
                vtx = RegionVertex(phase_name, composition, comp_conds,
                                   phase_points, phase_recs, disordered_flag,
                                   has_missing_comp_cond)
                vertices.append(vtx)
            region = PhaseRegion(vertices, pot_conds, species, data_phases,
                                 models)
            phase_regions.append(region)

        data_dict = {
            'weight': data.get('weight', 1.0),
            'phase_regions': phase_regions,
            'dataset_reference': data['reference']
        }
        zpf_data.append(data_dict)
    return zpf_data
Esempio n. 3
0
def calculate_driving_force(dbf,
                            data_comps,
                            phases,
                            current_statevars,
                            ph_cond_dict,
                            phase_models,
                            phase_dict,
                            parameters,
                            callables,
                            tol=0.001,
                            max_it=50):
    """
    Calculates driving force for a single data point.

    Parameters
    ----------
    dbf : pycalphad.Database
        Database to consider
    data_comps : list
        List of active component names
    phases : list
        List of phases to consider
    current_statevars : dict
        Dictionary of state variables, e.g. v.P and v.T, no compositions.
    ph_cond_dict : dict
        Dictionary mapping phases to the conditions at which they occurred in experiment.
    phase_models : dict
        Phase models to pass to pycalphad calculations
    parameters : dict
        Dictionary of symbols that will be overridden in pycalphad.equilibrium
    callables : dict
        Callables to pass to pycalphad
    tol: double
        The tolerance allowed for optimization over hyperplanes.
    max_it: int
        The maximum number of iterations allowed for optimization over hyperplanes.

    Notes
    ------
    Calculates the driving force by optimizing the driving force over the chemical potential.
    Allow calculation of the driving force even when both tie points are missing.
    """
    # TODO Refactor absurd unpacking which represents a significant overhead.
    species = list(map(v.Species, data_comps))
    conditions = current_statevars
    if conditions.get(v.N) is None:
        conditions[v.N] = 1.0
    if np.any(np.array(conditions[v.N]) != 1):
        raise ConditionError('N!=1 is not yet supported, got N={}'.format(
            conditions[v.N]))
    conds = conditions
    str_conds = OrderedDict([(str(key), conds[key])
                             for key in sorted(conds.keys(), key=str)])
    models = instantiate_models(dbf,
                                data_comps,
                                phases,
                                model=phase_models,
                                parameters=parameters)
    prxs = build_phase_records(dbf,
                               species,
                               phases,
                               conds,
                               models,
                               build_gradients=True,
                               build_hessians=True,
                               callables=callables,
                               parameters=parameters)
    # Collect data information in phase_dict.
    for phase in phases:
        phase_dict[phase]['data'] = False
    for ph, cond in ph_cond_dict:
        has_nones = False
        ph_conds = cond[0]
        phase_dict[ph]['data'] = True
        for key in ph_conds:
            if ph_conds[key] is None:
                has_nones = True
                phase_dict[ph]['phase_record'] = None
                phase_dict[ph]['str_conds'] = None
        if not has_nones:
            ph_conds.update(conditions)
            phase_records = build_phase_records(dbf,
                                                species, [ph],
                                                ph_conds,
                                                models,
                                                build_gradients=True,
                                                build_hessians=True,
                                                callables=callables,
                                                parameters=parameters)
            phase_dict[ph]['phase_record'] = phase_records[ph]
            phase_dict[ph]['str_conds'] = OrderedDict([
                (str(key), ph_conds[key])
                for key in sorted(ph_conds.keys(), key=str)
            ])
            phase_dict[ph]['min_energy'] = None
    # Collect sampling and equilibrium information in phase_dict.
    for phase in phases:
        # If sample points have not yet been calculated for this phase, calculate them.
        if not 'sample_points' in phase_dict[phase]:
            phase_obj = dbf.phases[phase]
            components = models[phase].components
            variables, sublattice_dof = generate_dof(phase_obj, components)
            sample_points = _sample_phase_constitution(
                phase, phase_obj.constituents, sublattice_dof, data_comps,
                tuple(variables), point_sample, True, 2000)
            phase_dict[phase]['sample_points'] = sample_points
        # If composition values have not yet been calculated for this phase, calculate them.
        if not 'composition_values' in phase_dict[phase]:
            composition_values = np.zeros(
                (sample_points.shape[0],
                 len([sp for sp in species if sp.__str__() != 'VA'])))
            temp_comp_set = CompositionSet(prxs[phase])
            current_state_variables = np.array(
                [str_conds[key] for key in sorted(str_conds.keys(), key=str)])
            for i in range(sample_points.shape[0]):
                temp_comp_set.py_update(sample_points[i, :], np.array([1.0]),
                                        current_state_variables, False)
                composition_values[i, :] = temp_comp_set.X
            phase_dict[phase]['composition_values'] = composition_values
        energies = calculate(dbf,
                             data_comps, [phase],
                             points=phase_dict[phase]['sample_points'],
                             to_xarray=False,
                             **str_conds)
        phase_dict[phase]['energy_values'] = np.array(energies['GM'][0][0][0])
    hyperplane = generate_random_hyperplane(species)
    result = calculate_driving_force_at_chem_potential(dbf,
                                                       hyperplane,
                                                       species,
                                                       phase_dict,
                                                       prxs,
                                                       str_conds,
                                                       approx=True)
    # Ignore entire data point if pointsolver fails to converge.
    if result is None:
        return 0
    # Optimize over the hyperplane.
    it = 0
    current_driving_force = result['driving_force']
    new_plane = result['new_plane']
    last_plane = new_plane
    while np.linalg.norm(hyperplane - last_plane) > tol and it < max_it:
        it += 1
        last_plane = hyperplane
        result = calculate_driving_force_at_chem_potential(dbf,
                                                           new_plane,
                                                           species,
                                                           phase_dict,
                                                           prxs,
                                                           str_conds,
                                                           approx=True)
        # If step results in objective decrease, accept the step.
        if result['driving_force'] < current_driving_force:
            current_driving_force = result['driving_force']
            hyperplane = new_plane
            new_plane = result['new_plane']
        else:
            step = 0.5
            temp_hyperplane = new_plane
            while result[
                    'driving_force'] > current_driving_force and np.linalg.norm(
                        hyperplane - temp_hyperplane) > tol:
                temp_hyperplane = (1.0 - step) * hyperplane + step * new_plane
                result = calculate_driving_force_at_chem_potential(
                    dbf,
                    temp_hyperplane,
                    species,
                    phase_dict,
                    prxs,
                    str_conds,
                    approx=True)
                step /= 2
            hyperplane = temp_hyperplane
            result = calculate_driving_force_at_chem_potential(dbf,
                                                               hyperplane,
                                                               species,
                                                               phase_dict,
                                                               prxs,
                                                               str_conds,
                                                               approx=True)
            new_plane = result['new_plane']
            current_driving_force = result['driving_force']
    final_result = calculate_driving_force_at_chem_potential(dbf,
                                                             hyperplane,
                                                             species,
                                                             phase_dict,
                                                             prxs,
                                                             str_conds,
                                                             approx=True)
    final_driving_force = final_result['driving_force']
    print(it, final_driving_force, hyperplane)
    return final_driving_force
Esempio n. 4
0
def calculate_(
    dbf: Database,
    species: Sequence[v.Species],
    phases: Sequence[str],
    str_statevar_dict: Dict[str, np.ndarray],
    models: Dict[str, Model],
    phase_records: Dict[str, PhaseRecord],
    output: Optional[str] = 'GM',
    points: Optional[Dict[str, np.ndarray]] = None,
    pdens: Optional[int] = 2000,
    broadcast: Optional[bool] = True,
    fake_points: Optional[bool] = False,
) -> LightDataset:
    """
    Quickly sample phase internal degree of freedom with virtually no overhead.
    """
    points_dict = unpack_kwarg(points, default_arg=None)
    pdens_dict = unpack_kwarg(pdens, default_arg=2000)
    nonvacant_components = [
        x for x in sorted(species) if x.number_of_atoms > 0
    ]
    maximum_internal_dof = max(prx.phase_dof for prx in phase_records.values())
    all_phase_data = []
    for phase_name in sorted(phases):
        phase_obj = dbf.phases[phase_name]
        mod = models[phase_name]
        phase_record = phase_records[phase_name]
        points = points_dict[phase_name]
        variables, sublattice_dof = generate_dof(phase_obj, mod.components)
        if points is None:
            points = _sample_phase_constitution(phase_name,
                                                phase_obj.constituents,
                                                sublattice_dof, species,
                                                tuple(variables), point_sample,
                                                True, pdens_dict[phase_name])
        points = np.atleast_2d(points)

        fp = fake_points and (phase_name == sorted(phases)[0])
        phase_ds = _compute_phase_values(nonvacant_components,
                                         str_statevar_dict,
                                         points,
                                         phase_record,
                                         output,
                                         maximum_internal_dof,
                                         broadcast=broadcast,
                                         largest_energy=float(1e10),
                                         fake_points=fp)
        all_phase_data.append(phase_ds)

    if len(all_phase_data) > 1:
        concatenated_coords = all_phase_data[0].coords

        data_vars = all_phase_data[0].data_vars
        concatenated_data_vars = {}
        for var in data_vars.keys():
            data_coords = data_vars[var][0]
            points_idx = data_coords.index('points')  # concatenation axis
            arrs = []
            for phase_data in all_phase_data:
                arrs.append(getattr(phase_data, var))
            concat_data = np.concatenate(arrs, axis=points_idx)
            concatenated_data_vars[var] = (data_coords, concat_data)
        final_ds = LightDataset(data_vars=concatenated_data_vars,
                                coords=concatenated_coords)
    else:
        final_ds = all_phase_data[0]
    return final_ds