Ejemplo n.º 1
0
    def compute_stabilities_mod(self, phases_to_evaluate=None):
        """
        Calculate the stability for every Phase.

        Args:
            phases_to_evaluate ([phase]): Included phases, if None,
                uses every Phase in PhaseSpace.phases
        """

        if phases_to_evaluate is None:
            phases_to_evaluate = self.phases

        for p in tqdm(list(self.phase_dict.values())):
            if p.stability is None:  # for low e phases, we only need to eval stability if it doesn't exist
                try:
                    p.stability = p.energy - self.gclp(p.unit_comp)[0]
                except:
                    print(p)
                    p.stability = np.nan

        # will only do requested phases for things not in phase_dict
        for p in tqdm(phases_to_evaluate):
            if p not in list(self.phase_dict.values()):
                if p.name in self.phase_dict:
                    p.stability = p.energy - self.phase_dict[
                        p.name].energy + self.phase_dict[p.name].stability
                else:
                    try:
                        p.stability = p.energy - self.gclp(p.unit_comp)[0]
                    except:
                        print(p)
                        p.stability = np.nan
Ejemplo n.º 2
0
    def compute_stabilities(self, phases, ncpus=cpu_count()):
        """
        Calculate the stability for every Phase.

        Args:
            phases ([Phase]): list of Phases for which to compute
                stability
            ncpus (int): number of cpus to use, i. e. processes
                to use

        Returns:
            ([float]) stability values for all phases
        """
        self.update_phase_dict(ncpus=ncpus)
        if ncpus > 1:
            with Pool(ncpus) as pool:
                stabilities = pool.map(self.compute_stability, phases)
            # Pool doesn't always modify the phases directly,
            # so assign stability after
            for phase, stability in zip(phases, stabilities):
                phase.stability = stability
        else:
            stabilities = [
                self.compute_stability(phase) for phase in tqdm(phases)
            ]

        return stabilities
Ejemplo n.º 3
0
def cache_download(url, path):
    """
    Quick helper function to cache a generic download from a url
    in the CAMD local data directory

    Args:
        url (str): url for download
        path (str): path for download, is appended to the
            CAMD_CACHE location

    Returns:
        (None)
    """
    # Prep cache path and make necessary dirs
    cache_path = os.path.join(CAMD_CACHE, path)

    # Download and write file
    if not os.path.isfile(cache_path):
        makedirs_p(os.path.split(cache_path)[0])
        r = requests.get(url, stream=True)
        total_size = int(r.headers.get('content-length', 0))
        block_size = 1024  # 1 Kibibyte
        t = tqdm(total=total_size, unit='iB', unit_scale=True)
        with open(cache_path, 'wb') as f:
            for data in r.iter_content(block_size):
                t.update(len(data))
                f.write(data)
Ejemplo n.º 4
0
 def predict(self, X):
     # Apply the committee of models to candidate space
     committee_predictions = []
     for scaler, model in tqdm(self.committee_models):
         _X = scaler.transform(X)
         committee_predictions.append(model.predict(_X))
     stds = np.std(np.array(committee_predictions), axis=0)
     means = np.mean(np.array(committee_predictions), axis=0)
     return means, stds
Ejemplo n.º 5
0
 def predict(self, X):
     # Apply the committee of models to candidate space
     committee_predictions = []
     for i in tqdm(list(range(self.n_members))):
         scaler = self.committee_models[i][0]
         model = self.committee_models[i][1]
         _X = scaler.transform(X)
         committee_predictions.append(model.predict(_X))
     stds = np.std(np.array(committee_predictions), axis=0)
     means = np.mean(np.array(committee_predictions), axis=0)
     return means, stds
Ejemplo n.º 6
0
    def fit(self, X, y):
        """
        Fits the QBC committee member models

        Args:
            X (pandas.DataFrame, np.ndarray): input X values for fitting
            y (pandas.DataFrame, np.ndarray): output y values to regress
                or fit to

        Returns:
            None

        """
        self._X, self._y = X, y

        split_X = []
        split_y = []

        for i in range(self.n_members):
            a = np.arange(len(X))
            np.random.shuffle(a)
            indices = a[:int(self.training_fraction * len(X))]
            split_X.append(X.iloc[indices])
            split_y.append(y.iloc[indices])

        self.committee_models = []
        for i in tqdm(list(range(self.n_members))):
            scaler = StandardScaler()
            X = scaler.fit_transform(split_X[i])
            y = split_y[i]
            model = clone(self.model)
            model.fit(X, y)
            # Saving the scaler and model to make predictions
            self.committee_models.append([scaler, model])

        self.trained = True

        if self.test_full_model:
            # Get a CV score for an overall model with plot_hull dataset
            full_scaler = StandardScaler()
            _X = full_scaler.fit_transform(self._X, self._y)
            full_model = clone(self.model)
            full_model.fit(_X, self._y)
            cv_score = cross_val_score(
                full_model,
                _X,
                self._y,
                cv=KFold(5, shuffle=True),
                scoring="neg_mean_absolute_error",
            )
            self.cv_score = np.mean(cv_score) * -1
Ejemplo n.º 7
0
    def predict(self, X):
        """
        Apply the fitted committee of models to candidate space

        Args:
            X (pandas.DataFrame, np.ndarray): input matrix or values
                on which to predict

        Returns:
            (np.ndarray): mean values for predictions for all committee members
            (np.ndarray): standard deviation values for predictions for all committee members

        """
        committee_predictions = []
        for scaler, model in tqdm(self.committee_models):
            _X = scaler.transform(X)
            committee_predictions.append(model.predict(_X))
        stds = np.std(np.array(committee_predictions), axis=0)
        means = np.mean(np.array(committee_predictions), axis=0)
        return means, stds
Ejemplo n.º 8
0
    def fit(self, X, y):
        self._X, self._y = X, y

        split_X = []
        split_y = []

        for i in range(self.n_members):
            a = np.arange(len(X))
            np.random.shuffle(a)
            indices = a[:int(self.training_fraction * len(X))]
            split_X.append(X.iloc[indices])
            split_y.append(y.iloc[indices])

        self.committee_models = []
        for i in tqdm(list(range(self.n_members))):
            scaler = StandardScaler()
            X = scaler.fit_transform(split_X[i])
            y = split_y[i]
            model = self.ml_algorithm(**self.ml_algorithm_params)
            model.fit(X, y)
            self.committee_models.append(
                [scaler,
                 model])  # Note we're saving the scaler to use in predictions

        self.trained = True

        if self.test_full_model:
            # Get a CV score for an overall model with present dataset
            overall_model = self.ml_algorithm(**self.ml_algorithm_params)
            overall_scaler = StandardScaler()
            _X = overall_scaler.fit_transform(self._X, self._y)
            overall_model.fit(_X, self._y)
            cv_score = cross_val_score(overall_model,
                                       _X,
                                       self._y,
                                       cv=KFold(5, shuffle=True),
                                       scoring='neg_mean_absolute_error')
            self.cv_score = np.mean(cv_score) * -1