def integral_image(img: np.ndarray) -> np.ndarray: """ This function returns the integral image for a given image. Referred - https://stackoverflow.com/questions/25557973/efficient-summed-area-table-calculation-with-numpy Arguments: img {np.ndarray} -- The image for which the integral image needs to be computed for Returns: integral_image {np.ndarray} -- The integral image """ if len(img.shape) == 2: m, n = img.shape integral_image = np.zeros((m + 1, n + 1)) integral_image[1:, 1:] = img.cumsum(0).cumsum(1) return integral_image # Get image shape m, n, o = img.shape # Declare zeros array for initial condition integral_image = np.zeros((m + 1, n + 1, o)) # Do cumulative summation integral_image[1:, 1:, :] = img.cumsum(0).cumsum(1) return integral_image
def rolling_sum(x: np.ndarray, n: int) -> np.ndarray: if x.ndim == 1: result = x.cumsum(axis=0, dtype=float) result[n:] -= result[:-n] return result[n - 1:] elif x.ndim == 2: result = x.cumsum(axis=1, dtype=float) result[:, n:] -= result[:, :-n] return result[:, n - 1:] elif x.ndim == 3: result = x.cumsum(axis=2, dtype=float) result[:, :, n:] -= result[:, :, :-n] return result[:, :, n - 1:] else: raise NotImplementedError
def sample(p: np.ndarray, numsamp=1): cumprob = p.cumsum(axis=0) rands = np.random.rand(1, numsamp) samps = rands < cumprob samps[1:, 0:] = samps[0:-1, 0:] ^ samps[1:, 0:] lp = np.log((samps * p).sum(axis=0, keepdims=True)) return samps, lp
def _downsample_array( col: np.ndarray, target: int, random_state: AnyRandom = 0, replace: bool = True, inplace: bool = False, ): """\ Evenly reduce counts in cell to target amount. This is an internal function and has some restrictions: * total counts in cell must be less than target """ np.random.seed(random_state) cumcounts = col.cumsum() if inplace: col[:] = 0 else: col = np.zeros_like(col) total = np.int_(cumcounts[-1]) sample = np.random.choice(total, target, replace=replace) sample.sort() geneptr = 0 for count in sample: while count >= cumcounts[geneptr]: geneptr += 1 col[geneptr] += 1 return col
def find_image(im: np.ndarray, tpl: np.ndarray) -> Union[Tuple[int, int], Tuple[None, None]]: """ Original: https://stackoverflow.com/questions/29663764/determine-if-an-image-exists-within-a-larger-image-and-if-so-find-it-using-py :param im: Large image; operated upon :param tpl: Small image; template to look for in large image :return: x and y of found template in image or None, None, if not found """ im = np.atleast_3d(im) tpl = np.atleast_3d(tpl) H, W, D = im.shape[:3] h, w = tpl.shape[:2] # --Numpy magic begins here-- # I don't understand anything of it # Integral image and template sum per channel sat = im.cumsum(1).cumsum(0) tplsum = np.array([tpl[:, :, i].sum() for i in range(D)]) # Calculate lookup table for all the possible windows iA, iB, iC, iD = sat[:-h, :-w], sat[:-h, w:], sat[h:, :-w], sat[h:, w:] lookup = iD - iB - iC + iA # Possible matches possible_match = np.where( np.logical_and.reduce([lookup[..., i] == tplsum[i] for i in range(D)])) # Find exact match for y, x in zip(*possible_match): if np.all(im[y + 1:y + h + 1, x + 1:x + w + 1] == tpl): return y + 1, x + 1 return None, None
def should_incur_societal_cost(breach_level: float, lengths: np.ndarray, dykes: np.ndarray) -> bool: ind: np.ndarray = np.hstack(tup=(np.array([0]), lengths.cumsum())) for i in range(ind.shape[0] - 1): if np.all(dykes[ind[i]:ind[i + 1]] < breach_level): return False # at least one dyke is fully non-breached return True
def _truncate_alignment(align: Ragged, mask: numpy.ndarray) -> Ragged: # We're going to have fewer wordpieces in the new array, so all of our # wordpiece indices in the alignment table will be off --- they'll point # to the wrong row. So we need to do three things here: # # 1) Adjust all the indices in align.dataXd to account for the dropped data # 2) Remove the dropped indices from the align.dataXd # 3) Calculate new align.lengths # # The wordpiece mapping is easily calculated by the cumulative sum of the # mask table. # Let's say we have [True, False, False, True]. The mapping of the dropped # wordpieces doesn't matter, because we can filter it with the mask. So we # have [0, 0, 0, 1], i.e the wordpiece that was # at 0 is still at 0, and the wordpiece that was at 3 is now at 1. mask = mask.ravel() idx_map = mask.cumsum() - 1 idx_map[~mask] = -1 # Step 1: Adjust all the indices in align.dataXd. new_align = idx_map[align.data.ravel()] # Step 2: Remove the dropped indices new_align = new_align[new_align >= 0] # Step 3: Calculate new align.lengths new_lengths = align.lengths.copy() for i in range(len(align.lengths)): drops = ~mask[align[i].data.ravel()] new_lengths[i] -= drops.sum() return Ragged(new_align, new_lengths)
def _sus(self, *, population: np.ndarray, population_fitness: np.ndarray, n: int, **kwargs) -> np.ndarray: """Select individuals in population using stochastic universal sampling, which samples without replacement. Args: population (np.ndarray): the population to mutate. Shape is n_individuals x n_chromosomes. population_fitness (np.ndarray): the population fitness. Is a 1D array same length as population. n (int): total number of individuals to select **kwargs: keyword arguments for plugins Returns: np.ndarray: selected population """ # cumsum creates the roulette wheel # is order-insensitive: if 1st element largest, starts there fitness_cumsum = population_fitness.cumsum() fitness_sum = fitness_cumsum[-1] # so if total is 100 and n is 10 # 1st point is 10 starting point is first element > 10 step = fitness_sum / n start = np.random.random() * step # selectors are the evenly-spaced points on wheel selectors = np.arange(start, fitness_sum, step) # Find indices where selectors should be inserted to maintain order. # so if fitness is 1, 2, 3, 4, 5 and selectors 1.2 and 3.1 # np.searchsorted would be [1, 3] return population[np.searchsorted(fitness_cumsum, selectors)]
def integral_image(input_image: np.ndarray) -> np.ndarray: """ Compute an integral image. Useful for computing Haar-like Features. (https://en.wikipedia.org/wiki/Summed-area_table) """ return input_image.cumsum(0).cumsum(1)
def plot_singular_values_and_energy(sv: np.ndarray, k: int): """ Plot singular values and accumulated magnitude of singular values. Arguments: sv: vector containing singular values k: index for threshold for magnitude of Side Effects: - Opens plotting window """ en_cum = sv.cumsum() fig = pylab.figure(figsize=(15, 8)) fig.add_subplot(1, 2, 2) plt.plot(sv) plt.vlines(k, 0.0, max(sv), colors='r', linestyles='solid') plt.xlim(0, len(sv)) plt.ylim(0.0, max(sv)) plt.xlabel('Index of singular value') plt.ylabel('Magnitude singular value') fig.add_subplot(1, 2, 1) plt.plot(en_cum) plt.vlines(k, 0.0, max(en_cum), colors='r', linestyles='solid') plt.xlim(0, len(en_cum)) plt.ylim(0.0, max(en_cum)) plt.ylabel('Accumulated singular values') plt.xlabel('Number of first singular value in accumulation.') plt.show()
def test_shapes(self, boxes: ndarray, truth: ndarray, data: st.SearchStrategy): """ Ensure the shape returned by generate_targets is correct, even in edge cases producing empty arrays. """ boxes = boxes.cumsum( axis=1) # to ensure we don't hit 0-width or -height boxes truth = truth.cumsum( axis=1) # to ensure we don't hit 0-width or -height boxes N = boxes.shape[0] K = truth.shape[0] labels = data.draw(hnp.arrays(dtype=int, shape=(K, ))) cls, reg = generate_targets(boxes, truth, labels, 0.5, 0.4) msg = "generate_targets failed to produce classification targets of the correct shape" assert cls.shape == (N, ), msg msg = "generate_targets failed to produce regression targets of the correct shape" assert reg.shape == (N, 4), msg
def _weighted_quantile_1d( data: np.ndarray, weights: np.ndarray, q: np.ndarray, skipna: bool, method: QUANTILE_METHODS = "linear", ) -> np.ndarray: # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation is_nan = np.isnan(data) if skipna: # Remove nans from data and weights not_nan = ~is_nan data = data[not_nan] weights = weights[not_nan] elif is_nan.any(): # Return nan if data contains any nan return np.full(q.size, np.nan) # Filter out data (and weights) associated with zero weights, which also flattens them nonzero_weights = weights != 0 data = data[nonzero_weights] weights = weights[nonzero_weights] n = data.size if n == 0: # Possibly empty after nan or zero weight filtering above return np.full(q.size, np.nan) # Kish's effective sample size nw = weights.sum() ** 2 / (weights**2).sum() # Sort data and weights sorter = np.argsort(data) data = data[sorter] weights = weights[sorter] # Normalize and sum the weights weights = weights / weights.sum() weights_cum = np.append(0, weights.cumsum()) # Vectorize the computation by transposing q with respect to weights q = np.atleast_2d(q).T # Get the interpolation parameter for each q h = _get_h(nw, q, method) # Find the samples contributing to the quantile computation (at *positions* between (h-1)/nw and h/nw) u = np.maximum((h - 1) / nw, np.minimum(h / nw, weights_cum)) # Compute their relative weight v = u * nw - h + 1 w = np.diff(v) # Apply the weights return (data * w).sum(axis=1)
def test_identical_proposed_and_truth(self, x: ndarray): """ Ensure that generate_targets produces regression targets that are zero for identical proposal and truth. """ x = x.cumsum(axis=1) # ensure (l, t, r , b) labels = np.array([0] * 5) _, reg = generate_targets(x, x, labels, 0.5, 0.4) msg = "generate_targets failed to produce the expected output when the proposed boxes are identical to ground truth" assert_allclose(actual=reg, desired=np.zeros_like(x), atol=1e-5, rtol=1e-5, err_msg=msg)
def moving_average_slim(arr: np.ndarray, n_win: int = 2, axis: int = -1) -> np.ndarray: # `(n_win-1)` shorter. Good for any positive `n_win`. Good if `arr` is empty in `axis` if n_win <= 0: raise ValueError(f"nonpositive `n_win` {n_win} not allowed") slc = SlicesAt(axis, arr.ndim) concatfn = tc.cat if type(arr) is tc.Tensor else np.concatenate cum = arr.cumsum(axis) # , dtype=float) return concatfn( [cum[slc[n_win - 1:n_win]], cum[slc[n_win:]] - cum[slc[:-n_win]]], axis) / float(n_win)
def best_consecutive_sum(prime_array: np.ndarray): setprimes = set(prime_array) maxprime = prime_array[-1] cumsum = prime_array.cumsum() for length in range(prime_array.size, 0, -1): for i in range(0, prime_array.size - length): total = cumsum[i + length] - cumsum[i] if total > maxprime: break if total in setprimes: return total, length
def _measure(probability_array: numpy.ndarray) -> int: """Given the cumulative probability values for states [0, 1, 2... , n], get state according to CDF. PDF of [0.1, 0.2,0.0,0.3,0.4] gives cum_probability_array e.g. [0.1, 0.3, 0.3, 0.6, 1. ] will return 0 , 1, 3 or 4 with 4 being twice as likely as 1. :param probability_array: array representing the probability of measuring each state :return: result of a single measurement on the probability array (returns the index of the array) """ index = numpy.random.random() cum_probability_array = probability_array.cumsum() return numpy.searchsorted(cum_probability_array, index, side="left")
def calculate_cdf(histogram: np.ndarray) -> np.ndarray: """ This method calculates the cumulative distribution function :param array histogram: The values of the histogram :return: normalized_cdf: The normalized cumulative distribution function :rtype: array """ # Get the cumulative sum of the elements cdf = histogram.cumsum() # Normalize the cdf normalized_cdf = cdf / float(cdf.max()) return normalized_cdf
def categorical(probs: np.ndarray, num_samples: int, logits=False, dtype=np.int32): """ tf.random.categorical(logits, 1) """ if logits: probs = np.exp(log_softmax(probs)) size = list(probs.shape[:-1]) + [num_samples] probs = probs.cumsum(-1).repeat(num_samples, axis=0) rand = np.random.uniform(size=size).reshape((-1, 1)) cat = (probs >= rand).argmax(-1)[..., None].reshape(size) return np.asarray(cat, dtype=dtype)
def binary_roc_auc(y_true: np.ndarray, y_pred: np.ndarray) -> float: """One vs rest ROC AUC.""" ix = np.argsort(y_pred)[::-1] y_true, y_pred = y_true[ix], y_pred[ix] distinct_value_indices = np.where(np.diff(y_pred))[0] threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] tps = y_true.cumsum()[threshold_idxs] fps = 1 + threshold_idxs - tps thresholds = y_pred[threshold_idxs] c = np.logical_or(np.diff(fps, 2), np.diff(tps, 2)) optimal_idxs = np.where(np.r_[True, c, True])[0] fps, tps = fps[optimal_idxs], tps[optimal_idxs] fps, tps = np.r_[0, fps], np.r_[0, tps] fpr, tpr = fps / fps[-1], tps / tps[-1] return np.trapz(tpr, fpr)
def batch_multinomial_sampling(probs: np.ndarray) -> np.ndarray: """Batched version of multinomial sampling. Draws samples from a batch of multinomial distributions simultaneously. Note: this can be replaced once numpy provides API support for sampling from multinomial distributions simultaneously. Arguments: probs: A 2-D `np.ndarray` of shape `[batch_size, num_classes]`. A batch of probability vectors such that `probs.sum(-1) = 1` Returns: An 1-D `np.ndarray` representing the sampled indices. """ return (probs.cumsum(-1) >= np.random.uniform(size=probs.shape[:-1])[..., None]).argmax(-1)
def vectorized_multinomial(selected_prob_matrix: np.ndarray, random_numbers: np.ndarray): """Vectorized sample from [B,N] probabilitity matrix Lightly edited from https://stackoverflow.com/a/34190035/2504700 Args: selected_prob_matrix: (Batch, p) size probability matrix (i.e. T[s,a] or O[s,a,s'] random_numbers: (Batch,) size random numbers from np.random.rand() Returns: (Batch,) size sampled integers """ s = selected_prob_matrix.cumsum( axis=1) # Sum over p dim for accumulated probability return (s < np.expand_dims(random_numbers, axis=-1)).sum( axis=1 ) # Returns first index where random number < accumulated probability
def cumsumr(array: np.ndarray, axis: int = 0) -> np.ndarray: """Finds cumulative sum that resets on 0. Args: array: Input array. axis: Axis where the sum is calculated. Default is 0. Returns: Cumulative sum, restarted at 0. Examples: >>> x = np.array([0, 0, 1, 1, 0, 0, 0, 1, 1, 1]) >>> cumsumr(x) [0, 0, 1, 2, 0, 0, 0, 1, 2, 3] """ cums = array.cumsum(axis=axis) return cums - np.maximum.accumulate(cums * (array == 0), axis=axis) # pylint: disable=E1101
def __init__(self, residuals: np.ndarray): res = sm.tsa.ARIMA(residuals.cumsum(), order=(1, 0, 0)).fit() a, b = res.params var = res.resid.var() k = -np.log(b) * 252 m = a / (1 - b) sigma = np.sqrt((var * 2 * k) / (1 - b ** 2)) sigma_eq = np.sqrt(var / (1 - b ** 2)) self.a: int = a self.b: int = b self.var: int = var self.k: int = k self.m: int = m self.sigma: int = sigma self.sigma_eq: int = sigma_eq self.tau: int = 1 / k
def plot(data: np.ndarray, discretized: bool = False): ''' Plots data using matplotlib :param data: numpy array with SIR functions values :param discretized: if SIR functions are represented with derivates, integrate over values ''' import matplotlib.pyplot as plt if discretized: plt.plot(data.cumsum(axis=0)) else: plt.plot(data) if data.shape[1] == 3: plt.legend(('Susceptible', 'Infectious', 'Recovered')) else: plt.legend(('Infectious', 'Recovered')) plt.show()
def min_err_threshold(histogram: np.ndarray, class_borders): # todo (thomas) here could be a bug?! """ NOTE: THIS Method is HIGHLY INSPIRED BY THE SOURCE CODE PROVIDED FROM: https://github.com/manuelaguadomtz/pythreshold author: Manuel Aguado Martínez (2017) Runs the minimum error thresholding algorithm. :param histogram (numpy ndarray of floats) :return: The threshold that minimize the error """ w_backg = histogram.cumsum() w_backg[w_backg == 0] = 1 w_foreg = w_backg[-1] - w_backg w_foreg[w_foreg == 0] = 1 # Cumulative distribution function cdf = np.cumsum(histogram * np.arange(len(histogram))) # Means (Last term is to avoid divisions by zero) b_mean = cdf / w_backg f_mean = (cdf[-1] - cdf) / w_foreg # Standard deviations b_std = ( (np.arange(len(histogram)) - b_mean)**2 * histogram).cumsum() / w_backg f_std = ((np.arange(len(histogram)) - f_mean)**2 * histogram).cumsum() f_std = (f_std[-1] - f_std) / w_foreg # To avoid log of 0 invalid calculations b_std[b_std == 0] = 1 f_std[f_std == 0] = 1 # Estimating error error_a = w_backg * np.log(b_std) + w_foreg * np.log(f_std) error_b = w_backg * np.log(w_backg) + w_foreg * np.log(w_foreg) error = 1 + 2 * (error_a - error_b) goodness, best_pos = __evaluate_goodness(f_std, f_mean, b_std, b_mean, error) return class_borders[best_pos + 1], goodness
def _reconstruct_lorenz(ts: np.ndarray, template: np.ndarray): """ Reconstructing the time series to array of z-vectors. For example: Time series: [1,2,3,4,5,6,7,8,9,10,11,12] Template: [1,1,5,2] Result: [[1, 2, 3, 8, 10], [2, 3, 4, 9, 11], [3, 4, 5, 10, 12]] :param ts: Original time series to reconstruct :param template: Template to reconstruct the ts :return: Reconstructed time series """ ts_list = [ts[:-np.sum(template)].reshape(-1, 1)] for offset in template.cumsum()[:-1]: offset_ts = ts[offset:-(template.sum() - offset)].reshape(-1, 1) ts_list.append(offset_ts) ts_list.append(ts[np.sum(template):].reshape(-1, 1)) reconstructed_ts = np.concatenate(ts_list, axis=1) return reconstructed_ts
def moving_average_full(arr: np.ndarray, n_win: int = 2, axis: int = -1) -> np.ndarray: # Same length as `arr`. Good for any positive `n_win`. Good if `arr` is empty in `axis` if n_win <= 0: raise ValueError(f"nonpositive `n_win` {n_win} not allowed") slc = SlicesAt(axis, arr.ndim) concatfn = tc.cat if type(arr) is tc.Tensor else np.concatenate cum = arr.cumsum(axis) # , dtype=float) stem = concatfn( [cum[slc[n_win - 1:n_win]], cum[slc[n_win:]] - cum[slc[:-n_win]]], axis) / float(n_win) length = arr.shape[axis] lwid = (n_win - 1) // 2 rwid = n_win // 2 + 1 return concatfn([ *[ cum[slc[j - 1:j]] / float(j) for i in range(min(lwid, length)) for j in [min(i + rwid, length)] ], stem, *[(cum[slc[-1:]] - cum[slc[i - lwid - 1:i - lwid]] if i - lwid > 0 else cum[slc[-1:]]) / float(length - i + lwid) for i in range(max(length - rwid + 1, lwid), length)] ], axis)
def _clip_extremes( x: np.ndarray, y: np.ndarray, w: np.ndarray, pct_to_clip: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Clips the pct_to_clip first and last values by weight.""" utils.expect(0 <= pct_to_clip < .5) if pct_to_clip == 0: return x, y, w w_cumsum = w.cumsum() w_sum = w_cumsum[-1] cut_weight = w_sum * pct_to_clip # The indices of the first and last nonzero entries after clipping. first_nonzero = w_cumsum.searchsorted(cut_weight, side='right') last_nonzero = w_cumsum.searchsorted(w_sum - cut_weight, side='left') x = x[first_nonzero:last_nonzero + 1] y = y[first_nonzero:last_nonzero + 1] w = w[first_nonzero:last_nonzero + 1].copy() # Don't modify the original. # Use any leftover cut_weight to reduce the first and last weights. w[0] = w_cumsum[first_nonzero] - cut_weight w[-1] = w_sum - w_cumsum[last_nonzero - 1] - cut_weight return x, y, w
def generate_sample_histogram( smoothed_data: np.ndarray, num_samples: int, random_state: np.random.RandomState, ): """Sample from the smoothed data as if it were a probability distribution""" # calculate the cdf values (making sure to normalize) cdf = smoothed_data.cumsum() cdf /= cdf[-1] # note: all elements of cdf are in [0,1] # evaluate the inverse cdf `num_samples` times by linearly interpolating points = np.linspace(0, len(cdf), endpoint=True) values = np.interp( x=random_state.rand(num_samples), xp=cdf, fp=points[:-1], ) # be lazy and get numpy to make a histogram for us return np.histogram( a=values, bins=points, )[0]
def __init__( self, X: np.ndarray, deltas: np.ndarray, theta: float, rho: float, robust: bool, eps: float = 1e-4, ): X = np.asarray(X, dtype=np.int32) deltas = np.asarray(deltas, dtype=np.float64) # restrict to segregating sites if robust: X = np.minimum(1, X) self.X = X self.theta = theta self.rho = rho self.Xcs = np.pad(self.X, [[0, 0], [0, 1]]).cumsum(axis=1) self.positions = np.concatenate( (np.zeros_like(deltas[:1]), deltas.cumsum())) self.robust = robust self._proxy = _SamplerProxy(self.Xcs, self.positions, theta, rho, robust, eps)