예제 #1
0
    def __init__(
        self,
        candidate_data=None,
        seed_data=None,
        n_query=1,
        hull_distance=0.0,
        parallel=cpu_count(),
        alpha=0.5,
        training_fraction=0.5,
        model=None,
        n_members=10,
    ):
        """
        Args:
            candidate_data (DataFrame): data about the candidates
            seed_data (DataFrame): data which to fit the Agent to
            n_query (int): number of hypotheses to generate
            hull_distance (float): hull distance as a criteria for
                which to deem a given material as "stable"
            parallel (bool): whether to use multiprocessing
                for phase stability analysis
            training_fraction (float): fraction of data to use for
                training committee members
            alpha (float): weighting factor for the stdev in making
                best-case predictions of the stability
            model (sklearn-style regressor): regressor
            n_members (int): number of committee members for the qbc
        """

        super(QBCStabilityAgent, self).__init__(
            candidate_data=candidate_data,
            seed_data=seed_data,
            n_query=n_query,
            hull_distance=hull_distance,
            parallel=parallel,
        )

        self.alpha = alpha
        self.model = model
        self.n_members = n_members
        self.qbc = QBC(
            n_members=n_members,
            training_fraction=training_fraction,
            model=model,
        )
예제 #2
0
    def __init__(self,
                 candidate_data=None,
                 seed_data=None,
                 n_query=1,
                 hull_distance=0.0,
                 multiprocessing=True,
                 alpha=0.5,
                 training_fraction=0.5,
                 ml_algorithm=None,
                 ml_algorithm_params=None,
                 n_members=10):
        """
        Args:
            candidate_data (DataFrame): data about the candidates
            seed_data (DataFrame): data which to fit the Agent to
            n_query (int): number of hypotheses to generate
            hull_distance (float): hull distance as a criteria for
                which to deem a given material as "stable"
            multiprocessing (bool): whether to use multiprocessing
                for phase stability analysis
            training_fraction (float): fraction of data to use for
                training committee members
            alpha (float): weighting factor for the stdev in making
                best-case predictions of the stability
            ml_algorithm (sklearn-style regressor): Regression method
            ml_algorithm_params (dict): parameters to pass to the regression
                method
            n_members (int): number of committee members for the qbc
        """

        super(QBCStabilityAgent,
              self).__init__(candidate_data=candidate_data,
                             seed_data=seed_data,
                             n_query=n_query,
                             hull_distance=hull_distance,
                             multiprocessing=multiprocessing)

        self.alpha = alpha
        self.qbc = QBC(n_members=n_members,
                       training_fraction=training_fraction,
                       ml_algorithm=ml_algorithm,
                       ml_algorithm_params=ml_algorithm_params)
예제 #3
0
class QBCStabilityAgent(StabilityAgent):
    """
    Agent which uses QBC to determine optimal hypotheses
    """
    def __init__(
        self,
        candidate_data=None,
        seed_data=None,
        n_query=1,
        hull_distance=0.0,
        parallel=cpu_count(),
        alpha=0.5,
        training_fraction=0.5,
        model=None,
        n_members=10,
    ):
        """
        Args:
            candidate_data (DataFrame): data about the candidates
            seed_data (DataFrame): data which to fit the Agent to
            n_query (int): number of hypotheses to generate
            hull_distance (float): hull distance as a criteria for
                which to deem a given material as "stable"
            parallel (bool): whether to use multiprocessing
                for phase stability analysis
            training_fraction (float): fraction of data to use for
                training committee members
            alpha (float): weighting factor for the stdev in making
                best-case predictions of the stability
            model (sklearn-style regressor): regressor
            n_members (int): number of committee members for the qbc
        """

        super(QBCStabilityAgent, self).__init__(
            candidate_data=candidate_data,
            seed_data=seed_data,
            n_query=n_query,
            hull_distance=hull_distance,
            parallel=parallel,
        )

        self.alpha = alpha
        self.model = model
        self.n_members = n_members
        self.qbc = QBC(
            n_members=n_members, training_fraction=training_fraction, model=model,
        )

    def get_hypotheses(self, candidate_data, seed_data=None, retrain_committee=True):
        """
        Get hypotheses method for QBCStabilityAgent

        Args:
            candidate_data (pandas.DataFrame): dataframe of candidates
            seed_data (pandas.DataFrame): dataframe of prior data on
                which to fit GPUCB
            retrain_committee (bool): whether to retrain committee
                each time

        Returns:
            (pandas.DataFrame): top candidates from the GPUCB algorithm

        """
        X_cand, X_seed, y_seed = self.update_data(candidate_data, seed_data)

        # Retrain committee if untrained or if specified
        if not self.qbc.trained or retrain_committee:
            self.qbc.fit(X_seed, y_seed)
        self.cv_score = self.qbc.cv_score

        # QBC makes predictions for Hf and uncertainty on candidate data
        preds, stds = self.qbc.predict(X_cand)
        expected = preds - stds * self.alpha

        # Update candidate data dataframe with predictions
        self.update_candidate_stabilities(expected, sort=True, floor=-6.0)

        # Find the most stable ones up to n_query within hull_distance
        stability_filter = self.candidate_data["pred_stability"] <= self.hull_distance
        within_hull = self.candidate_data[stability_filter]

        return within_hull.head(self.n_query)
예제 #4
0
class QBCStabilityAgent(StabilityAgent):
    def __init__(self,
                 candidate_data=None,
                 seed_data=None,
                 n_query=1,
                 hull_distance=0.0,
                 multiprocessing=True,
                 alpha=0.5,
                 training_fraction=0.5,
                 ml_algorithm=None,
                 ml_algorithm_params=None,
                 n_members=10):
        """
        Args:
            candidate_data (DataFrame): data about the candidates
            seed_data (DataFrame): data which to fit the Agent to
            n_query (int): number of hypotheses to generate
            hull_distance (float): hull distance as a criteria for
                which to deem a given material as "stable"
            multiprocessing (bool): whether to use multiprocessing
                for phase stability analysis
            training_fraction (float): fraction of data to use for
                training committee members
            alpha (float): weighting factor for the stdev in making
                best-case predictions of the stability
            ml_algorithm (sklearn-style regressor): Regression method
            ml_algorithm_params (dict): parameters to pass to the regression
                method
            n_members (int): number of committee members for the qbc
        """

        super(QBCStabilityAgent,
              self).__init__(candidate_data=candidate_data,
                             seed_data=seed_data,
                             n_query=n_query,
                             hull_distance=hull_distance,
                             multiprocessing=multiprocessing)

        self.alpha = alpha
        self.qbc = QBC(n_members=n_members,
                       training_fraction=training_fraction,
                       ml_algorithm=ml_algorithm,
                       ml_algorithm_params=ml_algorithm_params)

    def get_hypotheses(self,
                       candidate_data,
                       seed_data=None,
                       retrain_committee=True):
        X_cand, X_seed, y_seed = self.update_data(candidate_data, seed_data)

        # Retrain committee if untrained or if specified
        if not self.qbc.trained or retrain_committee:
            self.qbc.fit(X_seed, y_seed)
        self.cv_score = self.qbc.cv_score

        # QBC makes predictions for Hf and uncertainty on candidate data
        preds, stds = self.qbc.predict(X_cand)
        expected = preds - stds * self.alpha

        # Update candidate data dataframe with predictions
        self.update_candidate_stabilities(expected, sort=True, floor=-6.0)

        # Find the most stable ones up to n_query within hull_distance
        stability_filter = self.candidate_data[
            'pred_stability'] < self.hull_distance
        within_hull = self.candidate_data[stability_filter]

        self.indices_to_compute = within_hull.head(self.n_query).index.tolist()
        return self.indices_to_compute