Example #1
0
def _incremental_mean_and_var(X, epsilon, range, last_mean, last_variance,
                              last_sample_count):
    # old = stats until now
    # new = the current increment
    # updated = the aggregated stats
    last_sum = last_mean * last_sample_count
    new_mean = nanmean(X, epsilon=epsilon, axis=0, range=range)
    new_sample_count = np.sum(~np.isnan(X), axis=0)
    new_sum = new_mean * new_sample_count
    updated_sample_count = last_sample_count + new_sample_count

    updated_mean = (last_sum + new_sum) / updated_sample_count

    if last_variance is None:
        updated_variance = None
    else:
        new_unnormalized_variance = nanvar(
            X, epsilon=epsilon, axis=0, range=range) * new_sample_count
        last_unnormalized_variance = last_variance * last_sample_count

        with np.errstate(divide='ignore', invalid='ignore'):
            last_over_new_count = last_sample_count / new_sample_count
            updated_unnormalized_variance = (
                last_unnormalized_variance + new_unnormalized_variance +
                last_over_new_count / updated_sample_count *
                (last_sum / last_over_new_count - new_sum)**2)

        zeros = last_sample_count == 0
        updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
        updated_variance = updated_unnormalized_variance / updated_sample_count

    return updated_mean, updated_variance, updated_sample_count
Example #2
0
def _incremental_mean_and_var(X, epsilon, bounds, last_mean, last_variance,
                              last_sample_count):
    # Initialising new accountant, as budget is tracked in main class. Subject to review in line with GH issue #21
    temp_acc = BudgetAccountant()

    # old = stats until now
    # new = the current increment
    # updated = the aggregated stats
    last_sum = last_mean * last_sample_count

    new_mean = nanmean(X,
                       epsilon=epsilon,
                       axis=0,
                       bounds=bounds,
                       accountant=temp_acc)
    new_sample_count = np.sum(~np.isnan(X), axis=0)
    new_sum = new_mean * new_sample_count
    updated_sample_count = last_sample_count + new_sample_count

    updated_mean = (last_sum + new_sum) / updated_sample_count

    if last_variance is None:
        updated_variance = None
    else:
        new_unnormalized_variance = nanvar(
            X, epsilon=epsilon, axis=0, bounds=bounds,
            accountant=temp_acc) * new_sample_count
        last_unnormalized_variance = last_variance * last_sample_count

        with np.errstate(divide='ignore', invalid='ignore'):
            last_over_new_count = last_sample_count / new_sample_count
            updated_unnormalized_variance = (
                last_unnormalized_variance + new_unnormalized_variance +
                last_over_new_count / updated_sample_count *
                (last_sum / last_over_new_count - new_sum)**2)

        zeros = last_sample_count == 0
        updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
        updated_variance = updated_unnormalized_variance / updated_sample_count

    return updated_mean, updated_variance, updated_sample_count