def get_pred_surface(self, rh, X_scaled, conf_list: list, contour_step_size): """fit epm on the scaled input dimension and return data to plot a contour plot of the empirical performance Parameters ---------- rh: RunHistory runhistory X_scaled: np.array configurations in scaled 2dim conf_list: list list of Configuration objects Returns ------- contour_data: (np.array, np.array, np.array) x, y, Z for contour plots """ # use PCA to reduce features to also at most 2 dims scen = copy.deepcopy(self.scenario) # pca changes feats if scen.feature_array.shape[1] > 2: self.logger.debug( "Use PCA to reduce features to from %d dim to 2 dim", scen.feature_array.shape[1]) # perform PCA insts = scen.feature_dict.keys() feature_array = np.array([scen.feature_dict[i] for i in insts]) feature_array = StandardScaler().fit_transform(feature_array) feature_array = PCA(n_components=2).fit_transform(feature_array) # inject in scenario-object scen.feature_array = feature_array scen.feature_dict = dict([(inst, feature_array[idx, :]) for idx, inst in enumerate(insts)]) scen.n_features = 2 # convert the data to train EPM on 2-dim featurespace (for contour-data) self.logger.debug("Convert data for epm.") X, y, types = convert_data_for_epm(scenario=scen, runhistory=rh, logger=self.logger) types = np.array(np.zeros((2 + scen.feature_array.shape[1])), dtype=np.uint) num_params = len(scen.cs.get_hyperparameters()) # impute missing values in configs and insert MDS'ed (2dim) configs to the right positions conf_dict = {} for idx, c in enumerate(conf_list): conf_list[idx] = impute_inactive_values(c) conf_dict[str(conf_list[idx].get_array())] = X_scaled[idx, :] X_trans = [] for x in X: x_scaled_conf = conf_dict[str(x[:num_params])] # append scaled config + pca'ed features (total of 4 values) per config/feature-sample X_trans.append( np.concatenate((x_scaled_conf, x[num_params:]), axis=0)) X_trans = np.array(X_trans) self.logger.debug("Train random forest for contour-plot.") bounds = np.array([(0, np.nan), (0, np.nan)], dtype=object) model = RandomForestWithInstances(types=types, bounds=bounds, instance_features=np.array( scen.feature_array), ratio_features=1.0) start = time.time() model.train(X_trans, y) self.logger.debug("Fitting random forest took %f time", time.time() - start) x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1 y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, contour_step_size), np.arange(y_min, y_max, contour_step_size)) self.logger.debug("x_min: %f, x_max: %f, y_min: %f, y_max: %f", x_min, x_max, y_min, y_max) self.logger.debug( "Predict on %d samples in grid to get surface (step-size: %f)", np.c_[xx.ravel(), yy.ravel()].shape[0], contour_step_size) start = time.time() Z, _ = model.predict_marginalized_over_instances(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) self.logger.debug("Predicting random forest took %f time", time.time() - start) return xx, yy, Z
def get_pred_surface(self, rh, X_scaled, conf_list: list, contour_step_size): """fit epm on the scaled input dimension and return data to plot a contour plot of the empirical performance Parameters ---------- rh: RunHistory runhistory X_scaled: np.array configurations in scaled 2dim conf_list: list list of Configuration objects contour_step_size: float step-size for contour Returns ------- contour_data: (np.array, np.array, np.array) x, y, Z for contour plots """ # use PCA to reduce features to also at most 2 dims scen = copy.deepcopy(self.scenario) # pca changes feats if scen.feature_array.shape[1] > 2: self.logger.debug( "Use PCA to reduce features to from %d dim to 2 dim", scen.feature_array.shape[1]) # perform PCA insts = scen.feature_dict.keys() feature_array = np.array([scen.feature_dict[i] for i in insts]) feature_array = StandardScaler().fit_transform(feature_array) feature_array = PCA(n_components=2).fit_transform(feature_array) # inject in scenario-object scen.feature_array = feature_array scen.feature_dict = dict([(inst, feature_array[idx, :]) for idx, inst in enumerate(insts)]) scen.n_features = 2 # convert the data to train EPM on 2-dim featurespace (for contour-data) self.logger.debug("Convert data for epm.") X, y, types = convert_data_for_epm(scenario=scen, runhistory=rh, impute_inactive_parameters=True, logger=self.logger) types = np.array(np.zeros((2 + scen.feature_array.shape[1])), dtype=np.uint) num_params = len(scen.cs.get_hyperparameters()) # impute missing values in configs and insert MDS'ed (2dim) configs to the right positions conf_dict = {} # Remove forbidden clauses (this is necessary to enable the impute_inactive_values-method, see #226) cs_no_forbidden = copy.deepcopy(conf_list[0].configuration_space) cs_no_forbidden.forbidden_clauses = [] for idx, c in enumerate(conf_list): c.configuration_space = cs_no_forbidden conf_list[idx] = impute_inactive_values(c) conf_dict[str(conf_list[idx].get_array())] = X_scaled[idx, :] # Debug compare elements: c1, c2 = {str(z) for z in X}, {str(z) for z in conf_dict.keys()} self.logger.debug( "{} elements not in both sets, {} elements in both sets, X (len {}) and conf_dict (len {}) " "(might be a problem related to forbidden clauses?)".format( len(c1 ^ c2), len(c1 & c2), len(c1 ^ c2), len(c1), len(c2))) # self.logger.debug("Elements: {}".format(str(c1 ^ c2))) X_trans = [ ] # X_trans is the same as X but with reduced 2-dim features (so shape is (N, 2) instead of (N, M)) for x in X: x_scaled_conf = conf_dict[str(x[:num_params])] # append scaled config + pca'ed features (total of 4 values) per config/feature-sample X_trans.append( np.concatenate((x_scaled_conf, x[num_params:]), axis=0)) X_trans = np.array(X_trans) self.logger.debug( "Train random forest for contour-plot. Shape of X: {}, shape of X_trans: {}" .format(X.shape, X_trans.shape)) self.logger.debug("Faking configspace to be able to train rf...") # We need to fake config-space bypass imputation of inactive values in random forest implementation fake_cs = ConfigurationSpace(name="fake-cs-for-configurator-footprint") bounds = np.array([(0, np.nan), (0, np.nan)], dtype=object) model = RandomForestWithInstances(fake_cs, types, bounds, seed=self.rng.randint(MAXINT), instance_features=np.array( scen.feature_array), ratio_features=1.0) start = time.time() model.train(X_trans, y) self.logger.debug("Fitting random forest took %f time", time.time() - start) x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1 y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, contour_step_size), np.arange(y_min, y_max, contour_step_size)) self.logger.debug("x_min: %f, x_max: %f, y_min: %f, y_max: %f", x_min, x_max, y_min, y_max) self.logger.debug( "Predict on %d samples in grid to get surface (step-size: %f)", np.c_[xx.ravel(), yy.ravel()].shape[0], contour_step_size) start = time.time() Z, _ = model.predict_marginalized_over_instances(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) self.logger.debug("Predicting random forest took %f time", time.time() - start) return xx, yy, Z