def test_bin_to_group_indices(size=100, bins=10):
    bin_indices = RandomState().randint(0, bins, size=size)
    mask = RandomState().randint(0, 2, size=size) > 0.5
    group_indices = bin_to_group_indices(bin_indices, mask=mask)
    assert numpy.sum([len(group) for group in group_indices]) == numpy.sum(mask)
    a = numpy.sort(numpy.concatenate(group_indices))
    b = numpy.where(mask > 0.5)[0]
    assert numpy.all(a == b), 'group indices are computed wrongly'
Exemple #2
0
def test_bin_to_group_indices(size=100, bins=10):
    bin_indices = RandomState().randint(0, bins, size=size)
    mask = RandomState().randint(0, 2, size=size) > 0.5
    group_indices = bin_to_group_indices(bin_indices, mask=mask)
    assert numpy.sum([len(group) for group in group_indices]) == numpy.sum(mask)
    a = numpy.sort(numpy.concatenate(group_indices))
    b = numpy.where(mask > 0.5)[0]
    assert numpy.all(a == b), 'group indices are computed wrongly'
def generate_binned_dataset(n_samples, n_bins):
    """useful function, generates dataset with bins, groups, random weights.
    This is used to test correlation functions. """
    random = RandomState()
    y = random.uniform(size=n_samples) > 0.5
    pred = random.uniform(size=(n_samples, 2))
    weights = random.exponential(size=(n_samples,))
    bins = random.randint(0, n_bins, n_samples)
    groups = bin_to_group_indices(bin_indices=bins, mask=(y == 1))
    return y, pred, weights, bins, groups
def test_cvm(size=1000):
    y_pred = random.uniform(size=size)
    y = random.uniform(size=size) > 0.5
    sample_weight = random.exponential(size=size)
    bin_indices = random.randint(0, 10, size=size)
    mask = y == 1
    groups_indices = bin_to_group_indices(bin_indices=bin_indices, mask=mask)
    cvm1 = bin_based_cvm(y_pred[mask], sample_weight=sample_weight[mask], bin_indices=bin_indices[mask])
    cvm2 = group_based_cvm(y_pred, mask=mask, sample_weight=sample_weight, groups_indices=groups_indices)
    assert numpy.allclose(cvm1, cvm2)
Exemple #5
0
def test_groups_matrix(size=1000, bins=4):
    bin_indices = RandomState().randint(0, bins, size=size)
    mask = RandomState().randint(0, 2, size=size) > 0.5
    n_signal_events = numpy.sum(mask)
    group_indices = bin_to_group_indices(bin_indices, mask=mask)
    assert numpy.sum([len(group) for group in group_indices]) == n_signal_events
    group_matrix = group_indices_to_groups_matrix(group_indices, n_events=size)
    assert group_matrix.sum() == n_signal_events
    for event_id, (is_signal, bin) in enumerate(zip(mask, bin_indices)):
        assert group_matrix[bin, event_id] == is_signal
Exemple #6
0
def generate_binned_dataset(n_samples, n_bins):
    """useful function, generates dataset with bins, groups, random weights.
    This is used to test correlation functions. """
    random = RandomState()
    y = random.uniform(size=n_samples) > 0.5
    pred = random.uniform(size=(n_samples, 2))
    weights = random.exponential(size=(n_samples,))
    bins = random.randint(0, n_bins, n_samples)
    groups = bin_to_group_indices(bin_indices=bins, mask=(y == 1))
    return y, pred, weights, bins, groups
Exemple #7
0
def test_cvm(size=1000):
    y_pred = random.uniform(size=size)
    y = random.uniform(size=size) > 0.5
    sample_weight = random.exponential(size=size)
    bin_indices = random.randint(0, 10, size=size)
    mask = y == 1
    groups_indices = bin_to_group_indices(bin_indices=bin_indices, mask=mask)
    cvm1 = bin_based_cvm(y_pred[mask], sample_weight=sample_weight[mask], bin_indices=bin_indices[mask])
    cvm2 = group_based_cvm(y_pred, mask=mask, sample_weight=sample_weight, groups_indices=groups_indices)
    assert numpy.allclose(cvm1, cvm2)
Exemple #8
0
 def _compute_groups_indices(self, X, y, label):
     """Returns a list, each element is events' indices in some group."""
     label_mask = y == label
     extended_bin_limits = []
     for var in self.uniform_features:
         extended_bin_limits.append(numpy.percentile(X[var][label_mask], numpy.linspace(0, 100, 2 * self.n_bins + 1)))
     groups_indices = list()
     for shift in [0, 1]:
         bin_limits = []
         for axis_limits in extended_bin_limits:
             bin_limits.append(axis_limits[1 + shift:-1:2])
         bin_indices = compute_bin_indices(X.ix[:, self.uniform_features].values, bin_limits=bin_limits)
         groups_indices += list(bin_to_group_indices(bin_indices, mask=label_mask))
     return groups_indices