def _incremental_mean_and_var(X, epsilon, range, last_mean, last_variance, last_sample_count): # old = stats until now # new = the current increment # updated = the aggregated stats last_sum = last_mean * last_sample_count new_mean = nanmean(X, epsilon=epsilon, axis=0, range=range) new_sample_count = np.sum(~np.isnan(X), axis=0) new_sum = new_mean * new_sample_count updated_sample_count = last_sample_count + new_sample_count updated_mean = (last_sum + new_sum) / updated_sample_count if last_variance is None: updated_variance = None else: new_unnormalized_variance = nanvar( X, epsilon=epsilon, axis=0, range=range) * new_sample_count last_unnormalized_variance = last_variance * last_sample_count with np.errstate(divide='ignore', invalid='ignore'): last_over_new_count = last_sample_count / new_sample_count updated_unnormalized_variance = ( last_unnormalized_variance + new_unnormalized_variance + last_over_new_count / updated_sample_count * (last_sum / last_over_new_count - new_sum)**2) zeros = last_sample_count == 0 updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros] updated_variance = updated_unnormalized_variance / updated_sample_count return updated_mean, updated_variance, updated_sample_count
def _incremental_mean_and_var(X, epsilon, bounds, last_mean, last_variance, last_sample_count): # Initialising new accountant, as budget is tracked in main class. Subject to review in line with GH issue #21 temp_acc = BudgetAccountant() # old = stats until now # new = the current increment # updated = the aggregated stats last_sum = last_mean * last_sample_count new_mean = nanmean(X, epsilon=epsilon, axis=0, bounds=bounds, accountant=temp_acc) new_sample_count = np.sum(~np.isnan(X), axis=0) new_sum = new_mean * new_sample_count updated_sample_count = last_sample_count + new_sample_count updated_mean = (last_sum + new_sum) / updated_sample_count if last_variance is None: updated_variance = None else: new_unnormalized_variance = nanvar( X, epsilon=epsilon, axis=0, bounds=bounds, accountant=temp_acc) * new_sample_count last_unnormalized_variance = last_variance * last_sample_count with np.errstate(divide='ignore', invalid='ignore'): last_over_new_count = last_sample_count / new_sample_count updated_unnormalized_variance = ( last_unnormalized_variance + new_unnormalized_variance + last_over_new_count / updated_sample_count * (last_sum / last_over_new_count - new_sum)**2) zeros = last_sample_count == 0 updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros] updated_variance = updated_unnormalized_variance / updated_sample_count return updated_mean, updated_variance, updated_sample_count