Esempio n. 1
0
    def Running_weighted_percentile_2d(x, map_array, weights, window_width,
                                       q_percent):

        sorted_positions = np.argsort(map_array)
        x_ordered = x[sorted_positions]
        sorted_weights = weights[sorted_positions]

        runningmedian = []
        standard_deviation = []
        print('--> Generating runningPercentile with ', window_width,
              ' neighbours per element \n')
        for i in range(len(x)):

            if i < window_width // 2:
                runningmedian.append(
                    wq.quantile_1D(x_ordered[:2 * i + 1],
                                   sorted_weights[:2 * i + 1], q_percent))
                standard_deviation.append(np.nanstd(x_ordered[:2 * i + 1]))
            elif i > (len(x) - window_width // 2):
                runningmedian.append(
                    wq.quantile_1D(x_ordered[2 * i - len(x) - 1:],
                                   sorted_weights[2 * i - len(x) - 1:],
                                   q_percent))
                standard_deviation.append(
                    np.nanstd(x_ordered[2 * i - len(x) - 1:]))
            else:
                runningmedian.append(
                    wq.quantile_1D(
                        x_ordered[i - window_width // 2:i + window_width // 2],
                        sorted_weights[i - window_width // 2:i +
                                       window_width // 2], q_percent))
                standard_deviation.append(
                    np.nanstd(x_ordered[i - window_width // 2:i +
                                        window_width // 2]))

        return np.array(runningmedian), np.array(standard_deviation)
Esempio n. 2
0
def custom_violin_stats(data, weights):
    # Get wquantiles median and mean (using wquantiles module for median)
    median = wquantiles.quantile_1D(data, weights, 0.5)
    mean, sumw = np.ma.average(data, weights=list(weights), returned=True)

    # Use matplotlib violin_stats, which expects a function that takes in data and coords
    # which we get from closure above
    results = violin_stats(data, vdensity_with_weights(weights))

    # Update result dictionary with our updated info
    results[0][u"mean"] = mean
    results[0][u"median"] = median

    # No need to do this, since it should be populated from violin_stats
    # results[0][u"min"] =  np.min(data)
    # results[0][u"max"] =  np.max(data)

    return results
Esempio n. 3
0
def weighted_quantile(X, Y, Z, beta, j, tau):
    '''
    Update n_cores components of beta at each loop iteration
    
    Y (ndarray): dependant variable 1-d numpy.ndarray
    X (ndarray): Covariates (n,p) numpy.ndarray
    Z (ndarray): Empirical counterpart of the first order condition
    beta (array-like): The estimates of the coefficients
    j (integer): The coordinate of the coefficient to update
    tau (float): The quantile for the quantile regression
    ----------------------------------------------------------
    returns (array-like): The weighted quantile of Z to update beta_j, as solution of (3.4)
    '''
    # Draw a bootstrapped sample
    Z_boot = resample(Z)

    #Take the j-th columns
    Z_j = Z_boot[:, j]
    c_star = Z_j.sum()

    # Defining Z

    beta_star = np.concatenate((beta[:j], beta[j + 1:]))
    X_star = np.hstack([X[:, :j], X[:, j + 1:]])
    X_j = X[:, j]
    Y_star = residuals(Y, X_star, beta_star)
    # Adding the n+1th row to Y_star and X_j
    Y_star = np.append(Y_star, 3000)
    X_j = np.append(X_j, -c_star / tau)
    Z_star = np.divide(Y_star, X_j)

    # Tau_star
    abs_X_j = abs(X_j)
    tau_star = 0.5 + (tau - 0.5) * sum(X_j) / sum(abs_X_j)

    # Normalization of weights (sum up to 1)
    S = sum(abs_X_j)
    abs_X_j = abs_X_j / S

    return quantile_1D(np.reshape(Z_star, -1), np.reshape(abs_X_j, -1),
                       tau_star)
Esempio n. 4
0
def weighted_quantile(X, Y, Z, beta, j, tau, seed):
    '''
    Weighted quantile of Z, as solution of (3.4)
    '''
    # Draw a bootstrapped sample
    Z_boot = resample(Z, random_state=seed)

    #Take the j-th columns
    Z_j = Z_boot[:, j]
    c_star = Z_j.sum()

    # Defining Z

    beta_star = np.concatenate((beta[:j], beta[j + 1:]))
    X_star = np.hstack([X[:, :j], X[:, j + 1:]])
    X_j = X[:, j]
    Y_star = residuals(Y, X_star, beta_star)
    # Adding the n+1th row to Y_star and X_j
    Y_star = np.append(Y_star, 10**15)
    X_j = np.append(X_j, -c_star / tau)

    Z_star = np.divide(Y_star, X_j)

    # Tau_star
    abs_X_j = abs(X_j)
    tau_star = 0.5 + (tau - 0.5) * sum(X_j) / sum(abs_X_j)

    # Normalization of weights (sum up to 1)
    S = sum(abs_X_j)
    abs_X_j = abs_X_j / S

    # Sorting Z in ascending order
    abs_X_j = np.reshape(abs_X_j, (-1, 1))
    Z_star = np.reshape(Z_star, (-1, 1))

    return quantile_1D(np.reshape(Z_star, -1), np.reshape(abs_X_j, -1),
                       tau_star)
Esempio n. 5
0
 def test_weighted_median_1D_unsorted(self):
     # Median of the unsorted array
     self.assertEqual(quantile_1D(self.a1Du, self.a1Du_w, 0.5), 30)
     self.assertEqual(quantile_1D(self.a1Du, np.ones_like(self.a1Du), 0.5),
                      27.5)
Esempio n. 6
0
    store = pd.HDFStore(inpFile)
    quality = store['quality']
    volume = store['volume']
    store.close()

else:
    print('Input file must be Epanet or HDF5 format')
    exit(1)

# quality statistics over time
t = max(statStep,statWindow)
tEnd = max(volume.index)
qStat = pd.DataFrame(columns=['median','Q1','Q3'])
while t <= tEnd:
    v = volume[(volume.index >= t-statWindow) & (volume.index < t)].as_matrix().flatten()
    q = quality[(quality.index >= t-statWindow) & (quality.index < t)].as_matrix().flatten()

    # stats
    median = wquantiles.quantile_1D(q,v,0.5)
    q1 = wquantiles.quantile_1D(q,v,0.25)
    q3 = wquantiles.quantile_1D(q,v,0.75)
    qStat.loc[t] = [median, q1, q3]

    print('time=',t,', Q1=',q1,', Median=',median,', Q3=',q3)

    t += statStep

qStat.to_csv(outFile)

exit(0)
Esempio n. 7
0
    volume = store['volume']
    store.close()

else:
    print('Input file must be Epanet or HDF5 format')
    exit(1)

# quality statistics over time
t = max(statStep, statWindow)
tEnd = max(volume.index)
qStat = pd.DataFrame(columns=['median', 'Q1', 'Q3'])
while t <= tEnd:
    v = volume[(volume.index >= t - statWindow)
               & (volume.index < t)].as_matrix().flatten()
    q = quality[(quality.index >= t - statWindow)
                & (quality.index < t)].as_matrix().flatten()

    # stats
    median = wquantiles.quantile_1D(q, v, 0.5)
    q1 = wquantiles.quantile_1D(q, v, 0.25)
    q3 = wquantiles.quantile_1D(q, v, 0.75)
    qStat.loc[t] = [median, q1, q3]

    print('time=', t, ', Q1=', q1, ', Median=', median, ', Q3=', q3)

    t += statStep

qStat.to_csv(outFile)

exit(0)