class QBCStabilityAgent(StabilityAgent): """ Agent which uses QBC to determine optimal hypotheses """ def __init__( self, candidate_data=None, seed_data=None, n_query=1, hull_distance=0.0, parallel=cpu_count(), alpha=0.5, training_fraction=0.5, model=None, n_members=10, ): """ Args: candidate_data (DataFrame): data about the candidates seed_data (DataFrame): data which to fit the Agent to n_query (int): number of hypotheses to generate hull_distance (float): hull distance as a criteria for which to deem a given material as "stable" parallel (bool): whether to use multiprocessing for phase stability analysis training_fraction (float): fraction of data to use for training committee members alpha (float): weighting factor for the stdev in making best-case predictions of the stability model (sklearn-style regressor): regressor n_members (int): number of committee members for the qbc """ super(QBCStabilityAgent, self).__init__( candidate_data=candidate_data, seed_data=seed_data, n_query=n_query, hull_distance=hull_distance, parallel=parallel, ) self.alpha = alpha self.model = model self.n_members = n_members self.qbc = QBC( n_members=n_members, training_fraction=training_fraction, model=model, ) def get_hypotheses(self, candidate_data, seed_data=None, retrain_committee=True): """ Get hypotheses method for QBCStabilityAgent Args: candidate_data (pandas.DataFrame): dataframe of candidates seed_data (pandas.DataFrame): dataframe of prior data on which to fit GPUCB retrain_committee (bool): whether to retrain committee each time Returns: (pandas.DataFrame): top candidates from the GPUCB algorithm """ X_cand, X_seed, y_seed = self.update_data(candidate_data, seed_data) # Retrain committee if untrained or if specified if not self.qbc.trained or retrain_committee: self.qbc.fit(X_seed, y_seed) self.cv_score = self.qbc.cv_score # QBC makes predictions for Hf and uncertainty on candidate data preds, stds = self.qbc.predict(X_cand) expected = preds - stds * self.alpha # Update candidate data dataframe with predictions self.update_candidate_stabilities(expected, sort=True, floor=-6.0) # Find the most stable ones up to n_query within hull_distance stability_filter = self.candidate_data["pred_stability"] <= self.hull_distance within_hull = self.candidate_data[stability_filter] return within_hull.head(self.n_query)
class QBCStabilityAgent(StabilityAgent): def __init__(self, candidate_data=None, seed_data=None, n_query=1, hull_distance=0.0, multiprocessing=True, alpha=0.5, training_fraction=0.5, ml_algorithm=None, ml_algorithm_params=None, n_members=10): """ Args: candidate_data (DataFrame): data about the candidates seed_data (DataFrame): data which to fit the Agent to n_query (int): number of hypotheses to generate hull_distance (float): hull distance as a criteria for which to deem a given material as "stable" multiprocessing (bool): whether to use multiprocessing for phase stability analysis training_fraction (float): fraction of data to use for training committee members alpha (float): weighting factor for the stdev in making best-case predictions of the stability ml_algorithm (sklearn-style regressor): Regression method ml_algorithm_params (dict): parameters to pass to the regression method n_members (int): number of committee members for the qbc """ super(QBCStabilityAgent, self).__init__(candidate_data=candidate_data, seed_data=seed_data, n_query=n_query, hull_distance=hull_distance, multiprocessing=multiprocessing) self.alpha = alpha self.qbc = QBC(n_members=n_members, training_fraction=training_fraction, ml_algorithm=ml_algorithm, ml_algorithm_params=ml_algorithm_params) def get_hypotheses(self, candidate_data, seed_data=None, retrain_committee=True): X_cand, X_seed, y_seed = self.update_data(candidate_data, seed_data) # Retrain committee if untrained or if specified if not self.qbc.trained or retrain_committee: self.qbc.fit(X_seed, y_seed) self.cv_score = self.qbc.cv_score # QBC makes predictions for Hf and uncertainty on candidate data preds, stds = self.qbc.predict(X_cand) expected = preds - stds * self.alpha # Update candidate data dataframe with predictions self.update_candidate_stabilities(expected, sort=True, floor=-6.0) # Find the most stable ones up to n_query within hull_distance stability_filter = self.candidate_data[ 'pred_stability'] < self.hull_distance within_hull = self.candidate_data[stability_filter] self.indices_to_compute = within_hull.head(self.n_query).index.tolist() return self.indices_to_compute