def test_bins(size=500, n_bins=10): columns = ['var1', 'var2'] df = pandas.DataFrame(random.uniform(size=(size, 2)), columns=columns) x_limits = numpy.linspace(0, 1, n_bins + 1)[1:-1] bins = compute_bin_indices(df[columns].values, bin_limits=[x_limits, x_limits]) assert numpy.all(0 <= bins) and numpy.all( bins < n_bins * n_bins), "the bins with wrong indices appeared"
def _compute_groups_indices(self, X, y, label): """Returns a list, each element is events' indices in some group.""" label_mask = y == label extended_bin_limits = [] for var in self.uniform_features: extended_bin_limits.append(numpy.percentile(X[var][label_mask], numpy.linspace(0, 100, 2 * self.n_bins + 1))) groups_indices = list() for shift in [0, 1]: bin_limits = [] for axis_limits in extended_bin_limits: bin_limits.append(axis_limits[1 + shift:-1:2]) bin_indices = compute_bin_indices(X.ix[:, self.uniform_features].values, bin_limits=bin_limits) groups_indices += list(bin_to_group_indices(bin_indices, mask=label_mask)) return groups_indices
def test_bins(size=500, n_bins=10): columns = ['var1', 'var2'] df = pandas.DataFrame(random.uniform(size=(size, 2)), columns=columns) x_limits = numpy.linspace(0, 1, n_bins + 1)[1:-1] bins = compute_bin_indices(df[columns].values, bin_limits=[x_limits, x_limits]) assert numpy.all(0 <= bins) and numpy.all(bins < n_bins * n_bins), "the bins with wrong indices appeared"