예제 #1
0
    def get_pd(self, chemsys=None):
        """
        Refresh the phase diagram associated with the seed_data

        Args:
            chemsys (str): chemical system for which to filter
                seed data to provide partial phase diagram

        Returns:
            None
        """
        self.pd = PhaseData()
        # Filter seed data by relevant chemsys
        if chemsys:
            total_comp = Composition(chemsys.replace('-', ''))
            filtered = filter_dataframe_by_composition(self.seed_data,
                                                       total_comp)
        else:
            filtered = self.seed_data

        phases = [
            Phase(
                row["Composition"],
                energy=row["delta_e"],
                per_atom=True,
                description=row_index,
            ) for row_index, row in filtered.iterrows()
        ]
        phases.extend([Phase(el, 0.0, per_atom=True) for el in ELEMENTS])
        self.pd.add_phases(phases)
        return self.pd
예제 #2
0
    def get_phase_space(dataframe):
        """
        Gets PhaseSpace object associated with dataframe

        Args:
            dataframe (DataFrame): dataframe with columns "Composition"
                containing formula and "delta_e" containing
                formation energy per atom
        """
        phases = []
        for data in dataframe.iterrows():
            phases.append(
                Phase(
                    data[1]["Composition"],
                    energy=data[1]["delta_e"],
                    per_atom=True,
                    description=data[0],
                ))
        for el in ELEMENTS:
            phases.append(Phase(el, 0.0, per_atom=True))

        pd = PhaseData()
        pd.add_phases(phases)
        space = PhaseSpaceAL(bounds=ELEMENTS, data=pd)
        return space
예제 #3
0
    def get_pd(self):
        """
        Refresh the phase diagram associated with the seed_data

        Returns:
            None
        """
        self.pd = PhaseData()
        phases = [
            Phase(row['Composition'],
                  energy=row['delta_e'],
                  per_atom=True,
                  description=row_index)
            for row_index, row in self.seed_data.iterrows()
        ]
        phases.extend([Phase(el, 0.0, per_atom=True) for el in ELEMENTS])
        self.pd.add_phases(phases)
        return self.pd
예제 #4
0
    def get_phase_space(self, df=None):
        """
        Gets PhaseSpace object associated with dataframe
        """
        _df = df if df is not None else self.df
        phases = []
        for data in _df.iterrows():
            phases.append(
                Phase(data[1]['Composition'],
                      energy=data[1]['delta_e'],
                      per_atom=True,
                      description=data[0]))
        for el in ELEMENTS:
            phases.append(Phase(el, 0.0, per_atom=True))

        pd = PhaseData()
        pd.add_phases(phases)
        space = PhaseSpaceAL(bounds=ELEMENTS, data=pd)
        return space
예제 #5
0
class StabilityAgent(HypothesisAgent, metaclass=abc.ABCMeta):
    """
    The StabilityAgent is a mixin abstract class which contains
    initialization parameters and methods common to every agent
    which is responsible for making decisions about stability.
    """

    def __init__(
        self,
        candidate_data=None,
        seed_data=None,
        n_query=1,
        hull_distance=0.0,
        parallel=cpu_count(),
    ):
        """
        Args:
            candidate_data (DataFrame): data about the candidates
            seed_data (DataFrame): data which to fit the Agent to
            n_query (int): number of hypotheses to generate
            hull_distance (float): hull distance as a criteria for
                which to deem a given material as "stable"
            parallel (bool, int): whether to use multiprocessing
                for phase stability analysis, if an int, sets the n_jobs
                parameter as well.  If a bool, sets n_jobs to cpu_count()
                if True and n_jobs to 1 if false.
        """
        super().__init__()
        self.candidate_data = candidate_data
        self.seed_data = seed_data
        self.n_query = n_query
        self.hull_distance = hull_distance
        self.pd = None
        self.parallel = parallel

        # These might be able to go into the base class
        self.cv_score = np.nan

    def get_pd(self):
        """
        Refresh the phase diagram associated with the seed_data

        Returns:
            None
        """
        self.pd = PhaseData()
        phases = [
            Phase(
                row["Composition"],
                energy=row["delta_e"],
                per_atom=True,
                description=row_index,
            )
            for row_index, row in self.seed_data.iterrows()
        ]
        phases.extend([Phase(el, 0.0, per_atom=True) for el in ELEMENTS])
        self.pd.add_phases(phases)
        return self.pd

    def update_data(self, candidate_data=None, seed_data=None):
        """
        Helper function to update the data according to the schema
        of the default OQMD data.  Updates the candidate_data and
        seed_data attributes, and returns the processed features
        and targets associated with the candidates and seed data.

        Args:
            candidate_data (DataFrame): new candidate dataframe
            seed_data (DataFrame): new seed dataframe

        Returns:
            (DataFrame): candidate features
            (DataFrame): seed features
            (DataFrame): seed targets

        """
        # Note: In the drop command, we're ignoring errors for
        #   brevity.  We should watch this, because we may not
        #   drop everything we intend to.
        drop_columns = [
            "Composition",
            "N_species",
            "delta_e",
            "pred_delta_e",
            "pred_stability",
            "stability",
            "is_stable",
            "structure",
        ]
        if candidate_data is not None:
            self.candidate_data = candidate_data
            X_cand = candidate_data.drop(drop_columns, axis=1, errors="ignore")
        else:
            X_cand = None
        if seed_data is not None:
            self.seed_data = seed_data
            X_seed = self.seed_data.drop(drop_columns, axis=1, errors="ignore")
            y_seed = self.seed_data["delta_e"]
        else:
            X_seed, y_seed = None, None

        return X_cand, X_seed, y_seed

    def update_candidate_stabilities(self, formation_energies, sort=True, floor=-6.0):
        """
        Updates the candidate dataframe with the stabilities
        of the candidate compositions according to the requisite
        phase diagram analysis.

        Args:
            formation_energies ([float]): list of predictions for formation
                energies corresponding to candidate_data ordering
            sort (bool): whether or not to sort final list
            floor (float): a float intended to add a floor to the predicted
                formation energies

        Returns:
            (DataFrame): dataframe corresponding to self.candidate_data
        """
        # Preprocess formation energies with floor
        if floor is not None:
            formation_energies = np.array(formation_energies)
            formation_energies[formation_energies < floor] = floor

        # Update formation energy predictions
        self.candidate_data["pred_delta_e"] = formation_energies

        # Construct candidate phases
        candidate_phases = [
            Phase(
                data["Composition"],
                energy=data["pred_delta_e"],
                per_atom=True,
                description=m_id,
            )
            for m_id, data in self.candidate_data.iterrows()
        ]

        # Refresh and copy seed PD
        pd_ml = deepcopy(self.get_pd())
        pd_ml.add_phases(candidate_phases)
        space_ml = PhaseSpaceAL(bounds=ELEMENTS, data=pd_ml)

        # Compute and return stabilities
        space_ml.compute_stabilities(candidate_phases, self.parallel)
        self.candidate_data["pred_stability"] = [
            phase.stability for phase in candidate_phases
        ]

        if sort:
            self.candidate_data = self.candidate_data.sort_values("pred_stability")

        return self.candidate_data