def Running_weighted_percentile_2d(x, map_array, weights, window_width, q_percent): sorted_positions = np.argsort(map_array) x_ordered = x[sorted_positions] sorted_weights = weights[sorted_positions] runningmedian = [] standard_deviation = [] print('--> Generating runningPercentile with ', window_width, ' neighbours per element \n') for i in range(len(x)): if i < window_width // 2: runningmedian.append( wq.quantile_1D(x_ordered[:2 * i + 1], sorted_weights[:2 * i + 1], q_percent)) standard_deviation.append(np.nanstd(x_ordered[:2 * i + 1])) elif i > (len(x) - window_width // 2): runningmedian.append( wq.quantile_1D(x_ordered[2 * i - len(x) - 1:], sorted_weights[2 * i - len(x) - 1:], q_percent)) standard_deviation.append( np.nanstd(x_ordered[2 * i - len(x) - 1:])) else: runningmedian.append( wq.quantile_1D( x_ordered[i - window_width // 2:i + window_width // 2], sorted_weights[i - window_width // 2:i + window_width // 2], q_percent)) standard_deviation.append( np.nanstd(x_ordered[i - window_width // 2:i + window_width // 2])) return np.array(runningmedian), np.array(standard_deviation)
def custom_violin_stats(data, weights): # Get wquantiles median and mean (using wquantiles module for median) median = wquantiles.quantile_1D(data, weights, 0.5) mean, sumw = np.ma.average(data, weights=list(weights), returned=True) # Use matplotlib violin_stats, which expects a function that takes in data and coords # which we get from closure above results = violin_stats(data, vdensity_with_weights(weights)) # Update result dictionary with our updated info results[0][u"mean"] = mean results[0][u"median"] = median # No need to do this, since it should be populated from violin_stats # results[0][u"min"] = np.min(data) # results[0][u"max"] = np.max(data) return results
def weighted_quantile(X, Y, Z, beta, j, tau): ''' Update n_cores components of beta at each loop iteration Y (ndarray): dependant variable 1-d numpy.ndarray X (ndarray): Covariates (n,p) numpy.ndarray Z (ndarray): Empirical counterpart of the first order condition beta (array-like): The estimates of the coefficients j (integer): The coordinate of the coefficient to update tau (float): The quantile for the quantile regression ---------------------------------------------------------- returns (array-like): The weighted quantile of Z to update beta_j, as solution of (3.4) ''' # Draw a bootstrapped sample Z_boot = resample(Z) #Take the j-th columns Z_j = Z_boot[:, j] c_star = Z_j.sum() # Defining Z beta_star = np.concatenate((beta[:j], beta[j + 1:])) X_star = np.hstack([X[:, :j], X[:, j + 1:]]) X_j = X[:, j] Y_star = residuals(Y, X_star, beta_star) # Adding the n+1th row to Y_star and X_j Y_star = np.append(Y_star, 3000) X_j = np.append(X_j, -c_star / tau) Z_star = np.divide(Y_star, X_j) # Tau_star abs_X_j = abs(X_j) tau_star = 0.5 + (tau - 0.5) * sum(X_j) / sum(abs_X_j) # Normalization of weights (sum up to 1) S = sum(abs_X_j) abs_X_j = abs_X_j / S return quantile_1D(np.reshape(Z_star, -1), np.reshape(abs_X_j, -1), tau_star)
def weighted_quantile(X, Y, Z, beta, j, tau, seed): ''' Weighted quantile of Z, as solution of (3.4) ''' # Draw a bootstrapped sample Z_boot = resample(Z, random_state=seed) #Take the j-th columns Z_j = Z_boot[:, j] c_star = Z_j.sum() # Defining Z beta_star = np.concatenate((beta[:j], beta[j + 1:])) X_star = np.hstack([X[:, :j], X[:, j + 1:]]) X_j = X[:, j] Y_star = residuals(Y, X_star, beta_star) # Adding the n+1th row to Y_star and X_j Y_star = np.append(Y_star, 10**15) X_j = np.append(X_j, -c_star / tau) Z_star = np.divide(Y_star, X_j) # Tau_star abs_X_j = abs(X_j) tau_star = 0.5 + (tau - 0.5) * sum(X_j) / sum(abs_X_j) # Normalization of weights (sum up to 1) S = sum(abs_X_j) abs_X_j = abs_X_j / S # Sorting Z in ascending order abs_X_j = np.reshape(abs_X_j, (-1, 1)) Z_star = np.reshape(Z_star, (-1, 1)) return quantile_1D(np.reshape(Z_star, -1), np.reshape(abs_X_j, -1), tau_star)
def test_weighted_median_1D_unsorted(self): # Median of the unsorted array self.assertEqual(quantile_1D(self.a1Du, self.a1Du_w, 0.5), 30) self.assertEqual(quantile_1D(self.a1Du, np.ones_like(self.a1Du), 0.5), 27.5)
store = pd.HDFStore(inpFile) quality = store['quality'] volume = store['volume'] store.close() else: print('Input file must be Epanet or HDF5 format') exit(1) # quality statistics over time t = max(statStep,statWindow) tEnd = max(volume.index) qStat = pd.DataFrame(columns=['median','Q1','Q3']) while t <= tEnd: v = volume[(volume.index >= t-statWindow) & (volume.index < t)].as_matrix().flatten() q = quality[(quality.index >= t-statWindow) & (quality.index < t)].as_matrix().flatten() # stats median = wquantiles.quantile_1D(q,v,0.5) q1 = wquantiles.quantile_1D(q,v,0.25) q3 = wquantiles.quantile_1D(q,v,0.75) qStat.loc[t] = [median, q1, q3] print('time=',t,', Q1=',q1,', Median=',median,', Q3=',q3) t += statStep qStat.to_csv(outFile) exit(0)
volume = store['volume'] store.close() else: print('Input file must be Epanet or HDF5 format') exit(1) # quality statistics over time t = max(statStep, statWindow) tEnd = max(volume.index) qStat = pd.DataFrame(columns=['median', 'Q1', 'Q3']) while t <= tEnd: v = volume[(volume.index >= t - statWindow) & (volume.index < t)].as_matrix().flatten() q = quality[(quality.index >= t - statWindow) & (quality.index < t)].as_matrix().flatten() # stats median = wquantiles.quantile_1D(q, v, 0.5) q1 = wquantiles.quantile_1D(q, v, 0.25) q3 = wquantiles.quantile_1D(q, v, 0.75) qStat.loc[t] = [median, q1, q3] print('time=', t, ', Q1=', q1, ', Median=', median, ', Q3=', q3) t += statStep qStat.to_csv(outFile) exit(0)