Python convert_data_for_epm Beispiele

Programmiersprache: Python

Namespace / Paketname: cave.utils.convert_for_epm

Methode / Funktion: convert_data_for_epm

Beispiele auf hotexamples.com: 2

Python convert_data_for_epm - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die cave.utils.convert_for_epm.convert_data_for_epm, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: configurator_footprint.py Projekt: xiaoxiao19/CAVE

    def get_pred_surface(self, rh, X_scaled, conf_list: list,
                         contour_step_size):
        """fit epm on the scaled input dimension and
        return data to plot a contour plot of the empirical performance

        Parameters
        ----------
        rh: RunHistory
            runhistory
        X_scaled: np.array
            configurations in scaled 2dim
        conf_list: list
            list of Configuration objects

        Returns
        -------
        contour_data: (np.array, np.array, np.array)
            x, y, Z for contour plots
        """
        # use PCA to reduce features to also at most 2 dims
        scen = copy.deepcopy(self.scenario)  # pca changes feats
        if scen.feature_array.shape[1] > 2:
            self.logger.debug(
                "Use PCA to reduce features to from %d dim to 2 dim",
                scen.feature_array.shape[1])
            # perform PCA
            insts = scen.feature_dict.keys()
            feature_array = np.array([scen.feature_dict[i] for i in insts])
            feature_array = StandardScaler().fit_transform(feature_array)
            feature_array = PCA(n_components=2).fit_transform(feature_array)
            # inject in scenario-object
            scen.feature_array = feature_array
            scen.feature_dict = dict([(inst, feature_array[idx, :])
                                      for idx, inst in enumerate(insts)])
            scen.n_features = 2

        # convert the data to train EPM on 2-dim featurespace (for contour-data)
        self.logger.debug("Convert data for epm.")
        X, y, types = convert_data_for_epm(scenario=scen,
                                           runhistory=rh,
                                           logger=self.logger)
        types = np.array(np.zeros((2 + scen.feature_array.shape[1])),
                         dtype=np.uint)
        num_params = len(scen.cs.get_hyperparameters())

        # impute missing values in configs and insert MDS'ed (2dim) configs to the right positions
        conf_dict = {}
        for idx, c in enumerate(conf_list):
            conf_list[idx] = impute_inactive_values(c)
            conf_dict[str(conf_list[idx].get_array())] = X_scaled[idx, :]

        X_trans = []
        for x in X:
            x_scaled_conf = conf_dict[str(x[:num_params])]
            # append scaled config + pca'ed features (total of 4 values) per config/feature-sample
            X_trans.append(
                np.concatenate((x_scaled_conf, x[num_params:]), axis=0))
        X_trans = np.array(X_trans)

        self.logger.debug("Train random forest for contour-plot.")
        bounds = np.array([(0, np.nan), (0, np.nan)], dtype=object)
        model = RandomForestWithInstances(types=types,
                                          bounds=bounds,
                                          instance_features=np.array(
                                              scen.feature_array),
                                          ratio_features=1.0)

        start = time.time()
        model.train(X_trans, y)
        self.logger.debug("Fitting random forest took %f time",
                          time.time() - start)

        x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1
        y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, contour_step_size),
                             np.arange(y_min, y_max, contour_step_size))

        self.logger.debug("x_min: %f, x_max: %f, y_min: %f, y_max: %f", x_min,
                          x_max, y_min, y_max)
        self.logger.debug(
            "Predict on %d samples in grid to get surface (step-size: %f)",
            np.c_[xx.ravel(), yy.ravel()].shape[0], contour_step_size)

        start = time.time()
        Z, _ = model.predict_marginalized_over_instances(np.c_[xx.ravel(),
                                                               yy.ravel()])
        Z = Z.reshape(xx.shape)
        self.logger.debug("Predicting random forest took %f time",
                          time.time() - start)

        return xx, yy, Z

Beispiel #2

Datei anzeigen

Datei: configurator_footprint.py Projekt: byte-sculptor/CAVE

    def get_pred_surface(self, rh, X_scaled, conf_list: list,
                         contour_step_size):
        """fit epm on the scaled input dimension and
        return data to plot a contour plot of the empirical performance

        Parameters
        ----------
        rh: RunHistory
            runhistory
        X_scaled: np.array
            configurations in scaled 2dim
        conf_list: list
            list of Configuration objects
        contour_step_size: float
            step-size for contour

        Returns
        -------
        contour_data: (np.array, np.array, np.array)
            x, y, Z for contour plots
        """
        # use PCA to reduce features to also at most 2 dims
        scen = copy.deepcopy(self.scenario)  # pca changes feats
        if scen.feature_array.shape[1] > 2:
            self.logger.debug(
                "Use PCA to reduce features to from %d dim to 2 dim",
                scen.feature_array.shape[1])
            # perform PCA
            insts = scen.feature_dict.keys()
            feature_array = np.array([scen.feature_dict[i] for i in insts])
            feature_array = StandardScaler().fit_transform(feature_array)
            feature_array = PCA(n_components=2).fit_transform(feature_array)
            # inject in scenario-object
            scen.feature_array = feature_array
            scen.feature_dict = dict([(inst, feature_array[idx, :])
                                      for idx, inst in enumerate(insts)])
            scen.n_features = 2

        # convert the data to train EPM on 2-dim featurespace (for contour-data)
        self.logger.debug("Convert data for epm.")
        X, y, types = convert_data_for_epm(scenario=scen,
                                           runhistory=rh,
                                           impute_inactive_parameters=True,
                                           logger=self.logger)
        types = np.array(np.zeros((2 + scen.feature_array.shape[1])),
                         dtype=np.uint)
        num_params = len(scen.cs.get_hyperparameters())

        # impute missing values in configs and insert MDS'ed (2dim) configs to the right positions
        conf_dict = {}
        # Remove forbidden clauses (this is necessary to enable the impute_inactive_values-method, see #226)
        cs_no_forbidden = copy.deepcopy(conf_list[0].configuration_space)
        cs_no_forbidden.forbidden_clauses = []
        for idx, c in enumerate(conf_list):
            c.configuration_space = cs_no_forbidden
            conf_list[idx] = impute_inactive_values(c)
            conf_dict[str(conf_list[idx].get_array())] = X_scaled[idx, :]

        # Debug compare elements:
        c1, c2 = {str(z) for z in X}, {str(z) for z in conf_dict.keys()}
        self.logger.debug(
            "{} elements not in both sets, {} elements in both sets, X (len {}) and conf_dict (len {}) "
            "(might be a problem related to forbidden clauses?)".format(
                len(c1 ^ c2), len(c1 & c2), len(c1 ^ c2), len(c1), len(c2)))
        # self.logger.debug("Elements: {}".format(str(c1 ^ c2)))

        X_trans = [
        ]  # X_trans is the same as X but with reduced 2-dim features (so shape is (N, 2) instead of (N, M))
        for x in X:
            x_scaled_conf = conf_dict[str(x[:num_params])]
            # append scaled config + pca'ed features (total of 4 values) per config/feature-sample
            X_trans.append(
                np.concatenate((x_scaled_conf, x[num_params:]), axis=0))
        X_trans = np.array(X_trans)

        self.logger.debug(
            "Train random forest for contour-plot. Shape of X: {}, shape of X_trans: {}"
            .format(X.shape, X_trans.shape))
        self.logger.debug("Faking configspace to be able to train rf...")
        # We need to fake config-space bypass imputation of inactive values in random forest implementation
        fake_cs = ConfigurationSpace(name="fake-cs-for-configurator-footprint")

        bounds = np.array([(0, np.nan), (0, np.nan)], dtype=object)
        model = RandomForestWithInstances(fake_cs,
                                          types,
                                          bounds,
                                          seed=self.rng.randint(MAXINT),
                                          instance_features=np.array(
                                              scen.feature_array),
                                          ratio_features=1.0)

        start = time.time()
        model.train(X_trans, y)
        self.logger.debug("Fitting random forest took %f time",
                          time.time() - start)

        x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1
        y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, contour_step_size),
                             np.arange(y_min, y_max, contour_step_size))

        self.logger.debug("x_min: %f, x_max: %f, y_min: %f, y_max: %f", x_min,
                          x_max, y_min, y_max)
        self.logger.debug(
            "Predict on %d samples in grid to get surface (step-size: %f)",
            np.c_[xx.ravel(), yy.ravel()].shape[0], contour_step_size)

        start = time.time()
        Z, _ = model.predict_marginalized_over_instances(np.c_[xx.ravel(),
                                                               yy.ravel()])
        Z = Z.reshape(xx.shape)
        self.logger.debug("Predicting random forest took %f time",
                          time.time() - start)

        return xx, yy, Z