def get_pd(self, chemsys=None): """ Refresh the phase diagram associated with the seed_data Args: chemsys (str): chemical system for which to filter seed data to provide partial phase diagram Returns: None """ self.pd = PhaseData() # Filter seed data by relevant chemsys if chemsys: total_comp = Composition(chemsys.replace('-', '')) filtered = filter_dataframe_by_composition(self.seed_data, total_comp) else: filtered = self.seed_data phases = [ Phase( row["Composition"], energy=row["delta_e"], per_atom=True, description=row_index, ) for row_index, row in filtered.iterrows() ] phases.extend([Phase(el, 0.0, per_atom=True) for el in ELEMENTS]) self.pd.add_phases(phases) return self.pd
def get_phase_space(dataframe): """ Gets PhaseSpace object associated with dataframe Args: dataframe (DataFrame): dataframe with columns "Composition" containing formula and "delta_e" containing formation energy per atom """ phases = [] for data in dataframe.iterrows(): phases.append( Phase( data[1]["Composition"], energy=data[1]["delta_e"], per_atom=True, description=data[0], )) for el in ELEMENTS: phases.append(Phase(el, 0.0, per_atom=True)) pd = PhaseData() pd.add_phases(phases) space = PhaseSpaceAL(bounds=ELEMENTS, data=pd) return space
def get_pd(self): """ Refresh the phase diagram associated with the seed_data Returns: None """ self.pd = PhaseData() phases = [ Phase(row['Composition'], energy=row['delta_e'], per_atom=True, description=row_index) for row_index, row in self.seed_data.iterrows() ] phases.extend([Phase(el, 0.0, per_atom=True) for el in ELEMENTS]) self.pd.add_phases(phases) return self.pd
def get_phase_space(self, df=None): """ Gets PhaseSpace object associated with dataframe """ _df = df if df is not None else self.df phases = [] for data in _df.iterrows(): phases.append( Phase(data[1]['Composition'], energy=data[1]['delta_e'], per_atom=True, description=data[0])) for el in ELEMENTS: phases.append(Phase(el, 0.0, per_atom=True)) pd = PhaseData() pd.add_phases(phases) space = PhaseSpaceAL(bounds=ELEMENTS, data=pd) return space
class StabilityAgent(HypothesisAgent, metaclass=abc.ABCMeta): """ The StabilityAgent is a mixin abstract class which contains initialization parameters and methods common to every agent which is responsible for making decisions about stability. """ def __init__( self, candidate_data=None, seed_data=None, n_query=1, hull_distance=0.0, parallel=cpu_count(), ): """ Args: candidate_data (DataFrame): data about the candidates seed_data (DataFrame): data which to fit the Agent to n_query (int): number of hypotheses to generate hull_distance (float): hull distance as a criteria for which to deem a given material as "stable" parallel (bool, int): whether to use multiprocessing for phase stability analysis, if an int, sets the n_jobs parameter as well. If a bool, sets n_jobs to cpu_count() if True and n_jobs to 1 if false. """ super().__init__() self.candidate_data = candidate_data self.seed_data = seed_data self.n_query = n_query self.hull_distance = hull_distance self.pd = None self.parallel = parallel # These might be able to go into the base class self.cv_score = np.nan def get_pd(self): """ Refresh the phase diagram associated with the seed_data Returns: None """ self.pd = PhaseData() phases = [ Phase( row["Composition"], energy=row["delta_e"], per_atom=True, description=row_index, ) for row_index, row in self.seed_data.iterrows() ] phases.extend([Phase(el, 0.0, per_atom=True) for el in ELEMENTS]) self.pd.add_phases(phases) return self.pd def update_data(self, candidate_data=None, seed_data=None): """ Helper function to update the data according to the schema of the default OQMD data. Updates the candidate_data and seed_data attributes, and returns the processed features and targets associated with the candidates and seed data. Args: candidate_data (DataFrame): new candidate dataframe seed_data (DataFrame): new seed dataframe Returns: (DataFrame): candidate features (DataFrame): seed features (DataFrame): seed targets """ # Note: In the drop command, we're ignoring errors for # brevity. We should watch this, because we may not # drop everything we intend to. drop_columns = [ "Composition", "N_species", "delta_e", "pred_delta_e", "pred_stability", "stability", "is_stable", "structure", ] if candidate_data is not None: self.candidate_data = candidate_data X_cand = candidate_data.drop(drop_columns, axis=1, errors="ignore") else: X_cand = None if seed_data is not None: self.seed_data = seed_data X_seed = self.seed_data.drop(drop_columns, axis=1, errors="ignore") y_seed = self.seed_data["delta_e"] else: X_seed, y_seed = None, None return X_cand, X_seed, y_seed def update_candidate_stabilities(self, formation_energies, sort=True, floor=-6.0): """ Updates the candidate dataframe with the stabilities of the candidate compositions according to the requisite phase diagram analysis. Args: formation_energies ([float]): list of predictions for formation energies corresponding to candidate_data ordering sort (bool): whether or not to sort final list floor (float): a float intended to add a floor to the predicted formation energies Returns: (DataFrame): dataframe corresponding to self.candidate_data """ # Preprocess formation energies with floor if floor is not None: formation_energies = np.array(formation_energies) formation_energies[formation_energies < floor] = floor # Update formation energy predictions self.candidate_data["pred_delta_e"] = formation_energies # Construct candidate phases candidate_phases = [ Phase( data["Composition"], energy=data["pred_delta_e"], per_atom=True, description=m_id, ) for m_id, data in self.candidate_data.iterrows() ] # Refresh and copy seed PD pd_ml = deepcopy(self.get_pd()) pd_ml.add_phases(candidate_phases) space_ml = PhaseSpaceAL(bounds=ELEMENTS, data=pd_ml) # Compute and return stabilities space_ml.compute_stabilities(candidate_phases, self.parallel) self.candidate_data["pred_stability"] = [ phase.stability for phase in candidate_phases ] if sort: self.candidate_data = self.candidate_data.sort_values("pred_stability") return self.candidate_data