class AggByAmount(BaseEstimator, TransformerMixin): # Inputs: bins, encode, strategy ('uniform', 'quantile', 'kmeans'), number of top features, mean/max/min # Top features order: ['v1', 'v4', 'v10', 'v7', 'v18', 'v11', 'v20', 'amount', 'v3', 'v16', 'v13', 'v14', 'v8', 'v9', 'v19', 'v2', 'v5', 'v12', 'v26', 'v24', 'v25', 'v27', 'v17', 'v22', 'v23', 'v6', 'v15', 'v21'] def __init__(self, n_bins=2, strategy='quantile', columns_to_agg=['v1']): self.n_bins = n_bins self.strategy = strategy self.columns_to_agg = columns_to_agg self.kbins = None self.initial_columns = None self.agg_values = None def fit(self, X, y=None): self.kbins = KBinsDiscretizer(n_bins=self.n_bins, encode='ordinal', strategy=self.strategy) self.kbins.fit(X[['amount']].values) self.initial_columns = list(X.columns) X['amount_discretized'] = self.kbins.transform(X[['amount']].values) self.agg_values = X.groupby(by=['amount_discretized']).mean() self.agg_values = self.agg_values[self.columns_to_agg] self.agg_values.columns = [ x + "_mean_given_amount" for x in self.agg_values.columns ] return self def transform(self, X, y=None): X['amount_discretized'] = self.kbins.transform(X[['amount']].values) X = X.merge(self.agg_values, how='left', on=['amount_discretized']) X.drop(self.initial_columns + ['amount_discretized'], axis=1, inplace=True) return X
def test_redundant_bins(strategy, expected_bin_edges): X = [[0], [0], [0], [0], [3], [3]] kbd = KBinsDiscretizer(n_bins=3, strategy=strategy) warning_message = "Consider decreasing the number of bins." with pytest.warns(UserWarning, match=warning_message): kbd.fit(X) assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
class DiscretizeTransformer(object): """Discretize continuous columns into several bins. Transformation result is a int array.""" def __init__(self, meta, n_bins): self.meta = meta self.c_index = [ id for id, info in enumerate(meta) if info['type'] == CONTINUOUS ] self.kbin_discretizer = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform') def fit(self, data): if self.c_index == []: return self.kbin_discretizer.fit(data[:, self.c_index]) def transform(self, data): if self.c_index == []: return data.astype('int') data_t = data.copy() data_t[:, self.c_index] = self.kbin_discretizer.transform( data[:, self.c_index]) return data_t.astype('int') def inverse_transform(self, data): if self.c_index == []: return data data_t = data.copy().astype('float32') data_t[:, self.c_index] = self.kbin_discretizer.inverse_transform( data[:, self.c_index]) return data_t
def preprocess(self, X, method): """ Preprocess the data by scaling into the range of 0-1 with bins. """ if method == "bucket": # scales into 0-1 range with bins print("using the bucket prep method") from sklearn.preprocessing import KBinsDiscretizer est = KBinsDiscretizer(n_bins=10, encode="ordinal", strategy="quantile") est.fit(X) X_processed = est.transform(X) X_processed /= 10 # transform from nominal values to 0-1 return X_processed elif method == "clip": # clips the raw counts into a certain range print("using the clip prep method") cutoff = 1000 X_processed = np.minimum(X, cutoff) + np.sqrt( np.maximum(X - cutoff, 0)) return X_processed elif method == "log": # takes the log of the count print("using the log prep method") import numpy.ma as ma mask = ma.log(X) # mask logged data to replace NaN (log0) with 0 X_processed = ma.fix_invalid(mask, fill_value=0).data return X_processed else: raise Exception("Incorrect preprocess method name passed!")
class KBinsDiscretizer(Transformer): def __init__(self, n_bins=3, strategy='uniform'): super().__init__("discretizer", 24) self.input_type = NUMERICAL self.output_type = DISCRETE self.compound_mode = 'in_place' self.n_bins = n_bins self.strategy = strategy @ease_trans def operate(self, input_datanode: DataNode, target_fields=None): from sklearn.preprocessing import KBinsDiscretizer X, y = input_datanode.data if target_fields is None: target_fields = collect_fields(input_datanode.feature_types, self.input_type) X_new = X[:, target_fields] if not self.model: self.model = KBinsDiscretizer( n_bins=self.n_bins, encode='ordinal', strategy=self.strategy) self.model.fit(X_new) _X = self.model.transform(X_new) return _X @staticmethod def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_bins = UniformIntegerHyperparameter('n_bins', 2, 20, default_value=5) cs.add_hyperparameters([n_bins]) return cs
def test_KBinsDiscretizer_ordinal(): expected = pd.DataFrame({"name": numeric, "feature": numeric}) kbin = KBinsDiscretizer(n_bins=5, encode="ordinal") kbin.fit(X[numeric]) assert feat(kbin, numeric).equals(expected)
def q2(): discretizer = KBinsDiscretizer(n_bins=10, encode='ordinal') discretizer.fit(countries[['Pop_density']]) discretized_pop_density = discretizer.transform(countries[['Pop_density']]) return int(np.sum(discretized_pop_density >= 9))
def feature_eng(X): X['Title'] = X['Name'].apply(get_title) X['Title'] = X['Title'].fillna('Miss') X['Title'] = X['Title'].apply(replace_titles) X.loc[X.Age.isnull(), 'Age'] = X.groupby(['Sex','Pclass','Title']).Age.transform('median') X['Pclass'] = X['Pclass'].apply(lambda x: 'first' if x==1 else 'second' if x==2 else 'third') binner = KBinsDiscretizer(encode='ordinal') binner.fit(X[['Age']]) X['AgeBins'] = binner.transform(X[['Age']]) X['FamilySize'] = X['SibSp'] + X['Parch'] + 1 family_map = {1: 'Alone', 2: 'Small', 3: 'Small', 4: 'Small', 5: 'Large', 6: 'Large', 7: 'Large', 8: 'Large', 11: 'Large'} X['GroupSize'] = X['FamilySize'].map(family_map) X['WithFamily'] = (X['FamilySize']>1) X['WithFamily'] = X['WithFamily'].apply(lambda x: 'yes' if x==1 else 'no') X.loc[(X.Fare.isnull()), 'Fare'] = X.Fare.median() X.loc[(X.Fare==0), 'Fare'] = X.Fare.median() binner.fit(X[['Fare']]) X['FareBins'] = binner.transform(X[['Fare']]) X["Deck"] = X["Cabin"].str.slice(0,1) X["Deck"] = X["Deck"].fillna("N") idx = X[X['Deck'] == 'T'].index X.loc[idx, 'Deck'] = 'A' X['Embarked'].fillna(X['Embarked'].mode()[0], inplace=True) X.drop('PassengerId', axis=1, inplace=True) X.drop('Ticket', axis=1, inplace=True) X.drop('Name', axis=1, inplace=True) return X
def main(dataset_name: str): train = load_jsonlines('../data/' + dataset_name + '/lm/train.jsonl') valid = load_jsonlines('../data/' + dataset_name + '/lm/valid.jsonl') data = train + valid amounts = [] for d in data: amounts.extend(d['amounts']) amounts = np.array(amounts) amounts = amounts.reshape(-1, 1) dis = KBinsDiscretizer(n_bins=100, encode='ordinal', strategy='quantile') dis.fit(amounts) with open('presets/' + dataset_name + '/discretizers/100_quantile', 'wb') as f: pickle.dump(dis, f) dis = KBinsDiscretizer(n_bins=50, encode='ordinal', strategy='quantile') dis.fit(amounts) with open('presets/' + dataset_name + '/discretizers/50_quantile', 'wb') as f: pickle.dump(dis, f) return
def discretizer(df_list, names, config): """ concat the df_list as one pd.DataFrame then using sklean.KBinsDiscretizer depart it, Parameters ---------- df_list: object a collection of one or more pd.DataFrame. they must have one column named 'id' for indexing. names : list a list of the continuous variable column's name. If it's none,checking if all columns are continuous and discrete the continuous variable columns. config : object the object of parameters Returns ---------- df_list_t : object the df_list after trans ,still is the collection of pd.DataFrame """ data = concat_df_list(df_list, config.index_col) if names is None: names, _ = get_continue_feature(data) for name in names: kbdis = KBinsDiscretizer(n_bins=config.n_bins, encode="ordinal", strategy=config.method) kbdis.fit(data[name]) data.loc[:, name + "_discred"] = kbdis.transform(data[name]) df_list_t = retrun_df_list(df_list, data, config) return df_list_t
def test_invalid_encode_option(): est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='invalid-encode') err_msg = (r"Valid options for 'encode' are " r"\('onehot', 'onehot-dense', 'ordinal'\). " r"Got encode='invalid-encode' instead.") with pytest.raises(ValueError, match=err_msg): est.fit(X)
def q2(): # Retorne aqui o resultado da questão 2. kbins_discretizer = KBinsDiscretizer(n_bins = 10, encode = 'ordinal', strategy = 'quantile') kbins_discretizer.fit(countries[['Pop_density']]) bins_score = kbins_discretizer.transform(countries[['Pop_density']]) return int((bins_score >= 9).sum()) pass
def test_invalid_strategy_option(): est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], strategy='invalid-strategy') err_msg = (r"Valid options for 'strategy' are " r"\('uniform', 'quantile', 'kmeans'\). " r"Got strategy='invalid-strategy' instead.") with pytest.raises(ValueError, match=err_msg): est.fit(X)
def discrete_state(_, __, pole_angle, pole_vel) -> Tuple[int, ...]: """Convert continues state intro a discrete state""" est = KBinsDiscretizer(n_bins=num_bins, encode='ordinal', strategy='uniform') est.fit([l_bounds, u_bounds]) return tuple(map(int, est.transform([[pole_angle, pole_vel]])[0]))
def q2(): est = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile') est.fit(df[['Pop_density']]) popdensitycont = est.transform(df[["Pop_density"]]) p90 = math.trunc((90 / 100) * len(popdensitycont)) return len(popdensitycont) - p90
def test_kbinsdiscretizer_subsample_warn(): X = np.random.rand(200001, 1).reshape(-1, 1) kbd = KBinsDiscretizer(n_bins=100, encode="ordinal", strategy="quantile") msg = "In version 1.3 onwards, subsample=2e5 will be used by default." with pytest.warns(FutureWarning, match=msg): kbd.fit(X)
def preprocess_data(dataset, prune_experiments=False, bins=None): # Prune rows at beginning of experiments where speed and steering are too small. if prune_experiments: pruned_dataset = [] current_exp = dataset[0][0] current_exp_has_started = False for row in dataset: # Check if we just changed experiment. exp = row[0] if (exp != current_exp): current_exp = exp current_exp_has_started = False # Check if the ball has started to move. if abs(row[8]) >= 5 or abs(row[9]) >= 5: current_exp_has_started = True # If ball has started to move then add row if current_exp_has_started: pruned_dataset.append(row) # Set dataset to pruned version. dataset = np.array(pruned_dataset) # Create input matrix X. pos_X = np.array(dataset[:, 2:4], dtype='float64') quat_X = np.array(dataset[:, 4:8], dtype='float64') euler_X = np.matrix( [quaternion_to_euler(q[0], q[1], q[2], q[3]) for q in quat_X]) # Join blocks. X = np.concatenate((pos_X, quat_X, euler_X), axis=1) # Normalize X. scalerX = MinMaxScaler() scalerX.fit(X) X = scalerX.transform(X) # Create target matrix Y. speed_y = np.array(dataset[:, 8], dtype='float64') steering_y = np.array(dataset[:, 9], dtype='float64') # Join blocks. Y = np.column_stack((speed_y, steering_y)) # Classification. # This has been UNTESTED. if bins != None: scalerY = KBinsDiscretizer(n_bins=bins, encode='ordinal', strategy='uniform') scalerY.fit(Y) Y = scalerY.transform(Y) else: # Normalize Y. scalerY = MinMaxScaler() scalerY.fit(Y) Y = scalerY.transform(Y) return X, Y, scalerX, scalerY
def bins(self, data, labels): bins = [] for feature in self.to_discretize: x = np.reshape(data[:, feature], (-1, 1)) model = KMeans() visualizer = KElbowVisualizer(model, k=(3,12)) visualizer.fit(x) n_bins = visualizer.elbow_value_ kmeans = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='kmeans') if self.filename != "": plt.show(block=False) plt.savefig(self.filename + "/" + str(self.feature_names[feature]) + "_elbow_visualisation.png") plt.close('all') kmeans.fit(x) qts = [] for biner in kmeans.bin_edges_: qts.append(biner) qts = np.array(qts) if qts.shape[0] == 0: qts = np.array([np.median(data[:, feature])]) else: qts = np.sort(qts) bins.append(qts) return bins
def q2(): from sklearn.preprocessing import KBinsDiscretizer k_bins = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile') k_bins.fit(countries[['Pop_density']]) bins = k_bins.transform(countries[['Pop_density']]) return int(sum(bins[:, 0] >= 9))
def KBinsDiscretizer_continuos(dt, attributes=None, bins=3): attributes = dt.columns if attributes is None else attributes continuous_attributes = [ a for a in attributes if dt.dtypes[a] != np.object ] X_discretize = dt[attributes].copy() for col in continuous_attributes: if len(dt[col].value_counts()) > 10: from sklearn.preprocessing import KBinsDiscretizer est = KBinsDiscretizer(n_bins=bins, encode='ordinal', strategy='quantile') est.fit(dt[[col]]) edges = [i.round() for i in est.bin_edges_][0] edges = [int(i) for i in edges][1:-1] if len(set(edges)) != len(edges): edges = [ edges[i] for i in range(0, len(edges)) if len(edges) - 1 == i or edges[i] != edges[i + 1] ] for i in range(0, len(edges)): if i == 0: data_idx = dt.loc[dt[col] <= edges[i]].index X_discretize.loc[data_idx, col] = f"<={edges[i]}" if i == len(edges) - 1: data_idx = dt.loc[dt[col] > edges[i]].index X_discretize.loc[data_idx, col] = f">{edges[i]}" data_idx = dt.loc[(dt[col] > edges[i - 1]) & (dt[col] <= edges[i])].index X_discretize.loc[data_idx, col] = f"({edges[i-1]}-{edges[i]}]" else: X_discretize[col] = X_discretize[col].astype('object') return X_discretize
class KBinsDiscreteFeatureExtractor(BaseTransformer): """ 'uniform', 'quantile', 'kmeans' """ def __init__(self, cols, n_bins=10, encode="ordinal", strategy="uniform"): self.cols = cols self.n_bins = n_bins if type(n_bins) != int else [ n_bins for _ in range(len(cols)) ] self.kbin = KBinsDiscretizer(n_bins=self.n_bins, encode=encode, strategy=strategy) self.encode = encode self.strategy = strategy def _get_column_names(self): return [ f"k{bins}bins{self.strategy}category_{self.cols[i]}" for i, bins in zip(range(len(self.cols)), self.n_bins) ] def fit(self, X): self.kbin.fit(X[self.cols].fillna(0)) def transform(self, X): df = pd.DataFrame() value = self.kbin.transform(X[self.cols].fillna(0)) for i, bins in zip(range(len(self.cols)), self.n_bins): df[f"k{bins}bins{self.strategy}category_{self.cols[i]}"] = value[:, i] return df
def popularity_classification(wgt_pop, n_bins=5, encode='ordinal', strategy='quantile'): """" Discretize continuous popularity probability into intervals (Feature Binarization). Args: wgt_pop: df. Output from calc_popularity() nbins: int. Number of classes/bins encode: 'onehot', 'onehot-dense', 'ordinal' where the latest returns the bin identifier encoded as an integer value. strategy: 'uniform', 'kmeans', 'quantile' where the latest distributes all points equally between the bins (all bins have equal amount of points). Returns: df with popularity and popularity_class """ # extract popularity-values as 1D vector X = np.array(wgt_pop).reshape(-1, 1) # Configure and fit Binarizer enc = KBinsDiscretizer(n_bins=n_bins, encode=encode, strategy=strategy) enc.fit(X) x_encoded = pd.DataFrame(enc.transform(X)) # Append the bin-identifier as additional row to the original dataframe wgt_pop_df = pd.DataFrame(wgt_pop.copy()) wgt_pop_df['popularity_class'] = [int(x) + 1 for x in x_encoded.values ] # +1 to start at 1 and not 0 return wgt_pop_df
def _get_bins(num_bins: int, values: List[Number]): """ Perform equal frequency binning (i.e. quantile binning) to bin the values into `num_bins` bins. Parameters ---------- num_bins values Returns ------- bin indices """ # Turn values into np array or sklearn will complain np_values = np.array(values).reshape(-1, 1) # Equal frequency binning (i.e. quantile binning) binner = KBinsDiscretizer(n_bins=num_bins, encode="ordinal", strategy="quantile") binner.fit(np_values) # Bin that each training pair belongs to bin_idx = binner.transform(np_values).reshape(-1) # Debug _log.debug(f"Split heuristic values into {num_bins} bins") _log.debug(f"Bin Edges: {binner.bin_edges_}") _log.debug(f"Bin Frequencies: {Counter(bin_idx.tolist())}") _log.debug(f"Heuristic Frequencies: {Counter(values)}") return bin_idx
class DiscretizeTransformer(Transformer): """Discretize continuous columns into several bins. Attributes: meta column_index discretizer(sklearn.preprocessing.KBinsDiscretizer) Transformation result is a int array. """ def __init__(self, n_bins): self.n_bins = n_bins self.meta = None self.column_index = None self.discretizer = None def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()): self.meta = self.get_metadata(data, categorical_columns, ordinal_columns) self.column_index = [ index for index, info in enumerate(self.meta) if info['type'] == CONTINUOUS ] self.discretizer = KBinsDiscretizer(n_bins=self.n_bins, encode='ordinal', strategy='uniform') if not self.column_index: return self.discretizer.fit(data[:, self.column_index]) def transform(self, data): """Transform data discretizing continous values. Args: data(pandas.DataFrame) Returns: numpy.ndarray """ if self.column_index == []: return data.astype('int') data[:, self.column_index] = self.discretizer.transform( data[:, self.column_index]) return data.astype('int') def inverse_transform(self, data): if self.column_index == []: return data data = data.astype('float32') data[:, self.column_index] = self.discretizer.inverse_transform( data[:, self.column_index]) return data
class Affect: """ Class that calculates the average Affect score based on absolute sentiment polarity values. This approach is an initial approximation of the concept, and should be refined in the future. Should also implement polarity analysis at index time. """ def __init__(self, config): n_bins = 5 self.bins_discretizer = KBinsDiscretizer(encode='ordinal', n_bins=n_bins, strategy='uniform') warnings.filterwarnings("ignore", category=UserWarning) def compute_distr(self, arr, bins_discretizer, adjusted=False): """ Args: Return" """ n = len(arr) sum_one_over_ranks = harmonic_number(n) arr_binned = bins_discretizer.transform(arr) distr = {} if adjusted: for bin in list(range(bins_discretizer.n_bins)): for indx, ele in enumerate(arr_binned[:, 0]): if ele == bin: rank = indx + 1 bin_freq = distr.get(bin, 0.) distr[ bin] = bin_freq + 1 * 1 / rank / sum_one_over_ranks else: for bin in list(range(bins_discretizer.n_bins)): distr[bin] = round( np.count_nonzero(arr_binned == bin) / arr_binned.shape[0], 3) return distr def calculate(self, pool, recommendation): pool_affect = np.array(pool.sentiment.apply(lambda x: abs(x))).reshape( -1, 1) recommendation_affect = np.array( recommendation.sentiment.apply(lambda x: abs(x))).reshape(-1, 1) # arr_pool = np.array([abs(item.sentiment) for item in pool]).reshape(-1, 1) # arr_recommendation = np.array([abs(item.sentiment) for item in recommendation]).reshape(-1, 1) self.bins_discretizer.fit(pool_affect) distr_pool = self.compute_distr(pool_affect, self.bins_discretizer, False) distr_recommendation = self.compute_distr(recommendation_affect, self.bins_discretizer, True) divergence_with_discount = compute_kl_divergence( distr_pool, distr_recommendation) distr_recommendation = self.compute_distr(recommendation_affect, self.bins_discretizer, False) divergence_without_discount = compute_kl_divergence( distr_pool, distr_recommendation) return [divergence_with_discount, divergence_without_discount]
class KBinsDiscretizerComponent(AutoSklearnPreprocessingAlgorithm): def __init__(self, n_bins: int = 5, encode: str = "onehot", strategy: str = "quantile", random_state=None): super().__init__() self.n_bins = n_bins self.encode = encode self.strategy = strategy self.random_state = random_state def fit(self, X, Y=None): from sklearn.preprocessing import KBinsDiscretizer n_bins = int(self.n_bins) self.preprocessor = KBinsDiscretizer(n_bins=n_bins, encode=self.encode, strategy=self.strategy) self.preprocessor.fit(X, Y) return self def transform(self, X): if self.preprocessor is None: raise NotImplementedError() return self.preprocessor.transform(X) @staticmethod def get_properties(dataset_properties=None): return { 'shortname': 'KBinsDiscretizer', 'name': 'K-Bins Discretizer', 'handles_regression': True, 'handles_classification': True, 'handles_multiclass': True, 'handles_multilabel': True, 'handles_multioutput': True, 'is_deterministic': True, 'input': (DENSE, UNSIGNED_DATA), 'output': (INPUT, ) } @staticmethod def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_bins = UniformIntegerHyperparameter("n_bins", 2, 100, default_value=5) encode = CategoricalHyperparameter( "encode", ["onehot", "onehot-dense", "ordinal"], default_value="onehot") strategy = CategoricalHyperparameter("strategy", ["uniform", "quantile", "kmeans"], default_value="quantile") cs.add_hyperparameters([n_bins, encode, strategy]) return cs
def generate_discretizer(pageviews): sorted_pageview_values = sorted(list(pageviews.values())) view_numbers = np.array(sorted_pageview_values).reshape(-1, 1) discretizer = KBinsDiscretizer(encode='ordinal', n_bins=number_bins(), strategy='kmeans') discretizer.fit(view_numbers) return [discretizer, view_numbers]
def test_transform_1d_behavior(): X = np.arange(4) est = KBinsDiscretizer(n_bins=2) assert_raises(ValueError, est.fit, X) est = KBinsDiscretizer(n_bins=2) est.fit(X.reshape(-1, 1)) assert_raises(ValueError, est.transform, X)
def discretizer(_, __, angle, pole_velocity) -> Tuple[int, ...]: ''' Convert continuous states into a discrete state for Q-Learning ''' # Utilize Scikit-learns KBINsDiscretizer est = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform') est.fit([lower_bounds, upper_bounds]) return tuple(map(int, est.transform([[angle, pole_velocity]])[0]))
def test_transform_1d_behavior(): X = np.arange(4) est = KBinsDiscretizer(n_bins=2) assert_raises(ValueError, est.fit, X) est = KBinsDiscretizer(n_bins=2) est.fit(X.reshape(-1, 1)) assert_raises(ValueError, est.transform, X)
def q2(): discretizar = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile') discretizar.fit(countries[['Pop_density']]) resposta = discretizar.transform(countries[['Pop_density']]) resposta = sum(resposta[:, 0] == 9) return int(resposta)
def test_transform_outside_fit_range(strategy): X = np.array([0, 1, 2, 3])[:, None] kbd = KBinsDiscretizer(n_bins=4, strategy=strategy, encode='ordinal') kbd.fit(X) X2 = np.array([-2, 5])[:, None] X2t = kbd.transform(X2) assert_array_equal(X2t.max(axis=0) + 1, kbd.n_bins_) assert_array_equal(X2t.min(axis=0), [0])
def test_fit_transform(strategy, expected): est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy=strategy) est.fit(X) assert_array_equal(expected, est.transform(X))
xx, yy = np.meshgrid( np.linspace(X[:, 0].min(), X[:, 0].max(), 300), np.linspace(X[:, 1].min(), X[:, 1].max(), 300)) grid = np.c_[xx.ravel(), yy.ravel()] ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) i += 1 # transform the dataset with KBinsDiscretizer for strategy in strategies: enc = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy=strategy) enc.fit(X) grid_encoded = enc.transform(grid) ax = plt.subplot(len(X_list), len(strategies) + 1, i) # horizontal stripes horizontal = grid_encoded[:, 0].reshape(xx.shape) ax.contourf(xx, yy, horizontal, alpha=.5) # vertical stripes vertical = grid_encoded[:, 1].reshape(xx.shape) ax.contourf(xx, yy, vertical, alpha=.5) ax.scatter(X[:, 0], X[:, 1], edgecolors='k') ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(())