def _checks_and_wrangling(self, x, w): # Manage the input data in the same fashion as mpl if np.isscalar(x): x = [x] input_empty = (np.size(x) == 0) # Massage 'x' for processing. if input_empty: x = np.array([[]]) elif mpl.__version__ < '2.1.0': x = cbook._reshape_2D(x) else: x = cbook._reshape_2D(x, 'x') self.n_data_sets = len(x) # number of datasets # We need to do to 'weights' what was done to 'x' if w is not None: if mpl.__version__ < '2.1.0': w = cbook._reshape_2D(w) else: w = cbook._reshape_2D(w, 'w') if w is not None and len(w) != self.n_data_sets: raise ValueError('weights should have the same shape as x') if w is not None: for xi, wi in zip(x, w): if wi is not None and len(wi) != len(xi): raise ValueError('weights should have the same shape as x') return x, w
def test_reshape2d_pandas(pd): # separate to allow the rest of the tests to run if no pandas... X = np.arange(30).reshape(10, 3) x = pd.DataFrame(X, columns=["a", "b", "c"]) Xnew = cbook._reshape_2D(x, 'x') # Need to check each row because _reshape_2D returns a list of arrays: for x, xnew in zip(X.T, Xnew): np.testing.assert_array_equal(x, xnew) X = np.arange(30).reshape(10, 3) x = pd.DataFrame(X, columns=["a", "b", "c"]) Xnew = cbook._reshape_2D(x, 'x') # Need to check each row because _reshape_2D returns a list of arrays: for x, xnew in zip(X.T, Xnew): np.testing.assert_array_equal(x, xnew)
def test_reshape2d_xarray(xr): # separate to allow the rest of the tests to run if no xarray... X = np.arange(30).reshape(10, 3) x = xr.DataArray(X, dims=["x", "y"]) Xnew = cbook._reshape_2D(x, 'x') # Need to check each row because _reshape_2D returns a list of arrays: for x, xnew in zip(X.T, Xnew): np.testing.assert_array_equal(x, xnew)
def test_reshape2d(): class dummy: pass xnew = cbook._reshape_2D([], 'x') assert np.shape(xnew) == (1, 0) x = [dummy() for j in range(5)] xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (1, 5) x = np.arange(5) xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (1, 5) x = [[dummy() for j in range(5)] for i in range(3)] xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (3, 5) # this is strange behaviour, but... x = np.random.rand(3, 5) xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (5, 3) # Now test with a list of lists with different lengths, which means the # array will internally be converted to a 1D object array of lists x = [[1, 2, 3], [3, 4], [2]] xnew = cbook._reshape_2D(x, 'x') assert isinstance(xnew, list) assert isinstance(xnew[0], np.ndarray) and xnew[0].shape == (3,) assert isinstance(xnew[1], np.ndarray) and xnew[1].shape == (2,) assert isinstance(xnew[2], np.ndarray) and xnew[2].shape == (1,) # We now need to make sure that this works correctly for Numpy subclasses # where iterating over items can return subclasses too, which may be # iterable even if they are scalars. To emulate this, we make a Numpy # array subclass that returns Numpy 'scalars' when iterating or accessing # values, and these are technically iterable if checking for example # isinstance(x, collections.abc.Iterable). class ArraySubclass(np.ndarray): def __iter__(self): for value in super().__iter__(): yield np.array(value) def __getitem__(self, item): return np.array(super().__getitem__(item)) v = np.arange(10, dtype=float) x = ArraySubclass((10,), dtype=float, buffer=v.data) xnew = cbook._reshape_2D(x, 'x') # We check here that the array wasn't split up into many individual # ArraySubclass, which is what used to happen due to a bug in _reshape_2D assert len(xnew) == 1 assert isinstance(xnew[0], ArraySubclass)
def test_reshape2d(): class dummy(): pass xnew = cbook._reshape_2D([], 'x') assert np.shape(xnew) == (1, 0) x = [dummy() for j in range(5)] xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (1, 5) x = np.arange(5) xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (1, 5) x = [[dummy() for j in range(5)] for i in range(3)] xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (3, 5) # this is strange behaviour, but... x = np.random.rand(3, 5) xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (5, 3) # Now test with a list of lists with different lengths, which means the # array will internally be converted to a 1D object array of lists x = [[1, 2, 3], [3, 4], [2]] xnew = cbook._reshape_2D(x, 'x') assert isinstance(xnew, list) assert isinstance(xnew[0], np.ndarray) and xnew[0].shape == (3,) assert isinstance(xnew[1], np.ndarray) and xnew[1].shape == (2,) assert isinstance(xnew[2], np.ndarray) and xnew[2].shape == (1,) # We now need to make sure that this works correctly for Numpy subclasses # where iterating over items can return subclasses too, which may be # iterable even if they are scalars. To emulate this, we make a Numpy # array subclass that returns Numpy 'scalars' when iterating or accessing # values, and these are technically iterable if checking for example # isinstance(x, collections.abc.Iterable). class ArraySubclass(np.ndarray): def __iter__(self): for value in super().__iter__(): yield np.array(value) def __getitem__(self, item): return np.array(super().__getitem__(item)) v = np.arange(10, dtype=float) x = ArraySubclass((10,), dtype=float, buffer=v.data) xnew = cbook._reshape_2D(x, 'x') # We check here that the array wasn't split up into many individual # ArraySubclass, which is what used to happen due to a bug in _reshape_2D assert len(xnew) == 1 assert isinstance(xnew[0], ArraySubclass)
def test_reshape2d(): class dummy(): pass x = [dummy() for j in range(5)] xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (1, 5) x = np.arange(5) xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (1, 5) x = [[dummy() for j in range(5)] for i in range(3)] xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (3, 5) # this is strange behaviour, but... x = np.random.rand(3, 5) xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (5, 3)
def nofig_cumulative_hist(x, bins): """ The following function is based off the hist() class-based function within the _axes.py file of matplotlib located in: ...\matplotlib-base-3.0.3-py37h3e3dc42_0\Lib\site-packages\matplotlib\axes\_axes.py The current hist() method in matplotlib outputs figures along with data. The problem with this is that figures take computation time to execute and iterating this method over multiple datasets can cause crashing. At the same time, downstream functions require input data in a similar format as matplotlib function output. To resolve this, we use the hist() class function used in matplotlib and suppress the section of code that draws out the graphs and only output an array of the height-data and corresponding bins. Parameters ---------- x : np.uint16 : List of datapoints to process bins : np.uint16 : String of bins to use in np.histogram() Return ------ tops : np.array(dtype=float) : histogram heights bins : list : list of bin edges """ tops = [] x = cbook._reshape_2D(x, 'x') nx = len(x) xmin = np.inf xmax = -np.inf for xi in x: if len(xi) > 0: xmin = min(xmin, np.nanmin(xi)) xmax = max(xmax, np.nanmax(xi)) bin_range = (xmin, xmax) for i in range(nx): m, bins = np.histogram(x[i], bins, bin_range, density=True) m = m.astype(float) tops.append(m) slc = slice(None) tops = [(m * np.diff(bins))[slc].cumsum()[slc] for m in tops] return tops, bins
def my_boxplot_stats(X, whis=1.5, bootstrap=None, labels=None, autorange=False, percents=[25, 75]): ''' Return statistics computed for boxplot ''' def _bootstrap_median(data, N=5000): # determine 95% confidence intervals of the median M = len(data) percentiles = [2.5, 97.5] bs_index = np.random.randint(M, size=(N, M)) bsData = data[bs_index] estimate = np.median(bsData, axis=1, overwrite_input=True) CI = np.percentile(estimate, percentiles) return CI def _compute_conf_interval(data, med, iqr, bootstrap): if bootstrap is not None: # Do a bootstrap estimate of notch locations. # get conf. intervals around median CI = _bootstrap_median(data, N=bootstrap) notch_min = CI[0] notch_max = CI[1] else: N = len(data) notch_min = med - 1.57 * iqr / np.sqrt(N) notch_max = med + 1.57 * iqr / np.sqrt(N) return notch_min, notch_max # output is a list of dicts bxpstats = [] # convert X to a list of lists X = _reshape_2D(X, "X") ncols = len(X) if labels is None: labels = itertools.repeat(None) elif len(labels) != ncols: raise ValueError("Dimensions of labels and X must be compatible") input_whis = whis for ii, (x, label) in enumerate(zip(X, labels)): # empty dict stats = {} if label is not None: stats['label'] = label # restore whis to the input values in case it got changed in the loop whis = input_whis # note tricksyness, append up here and then mutate below bxpstats.append(stats) # if empty, bail if len(x) == 0: stats['fliers'] = np.array([]) stats['mean'] = np.nan stats['med'] = np.nan stats['q1'] = np.nan stats['q3'] = np.nan stats['cilo'] = np.nan stats['cihi'] = np.nan stats['whislo'] = np.nan stats['whishi'] = np.nan stats['med'] = np.nan continue # up-convert to an array, just to be safe x = np.asarray(x) # arithmetic mean stats['mean'] = np.mean(x) # median med = np.percentile(x, 50) ## Altered line q1, q3 = np.percentile(x, (percents[0], percents[1])) # interquartile range stats['iqr'] = q3 - q1 if stats['iqr'] == 0 and autorange: whis = 'range' # conf. interval around median stats['cilo'], stats['cihi'] = _compute_conf_interval( x, med, stats['iqr'], bootstrap) # lowest/highest non-outliers if np.isscalar(whis): if np.isreal(whis): loval = q1 - whis * stats['iqr'] hival = q3 + whis * stats['iqr'] elif whis in ['range', 'limit', 'limits', 'min/max']: loval = np.min(x) hival = np.max(x) else: raise ValueError('whis must be a float, valid string, or list ' 'of percentiles') else: loval = np.percentile(x, whis[0]) hival = np.percentile(x, whis[1]) # get high extreme wiskhi = np.compress(x <= hival, x) if len(wiskhi) == 0 or np.max(wiskhi) < q3: stats['whishi'] = q3 else: stats['whishi'] = np.max(wiskhi) # get low extreme wisklo = np.compress(x >= loval, x) if len(wisklo) == 0 or np.min(wisklo) > q1: stats['whislo'] = q1 else: stats['whislo'] = np.min(wisklo) # compute a single array of outliers stats['fliers'] = np.hstack([ np.compress(x < stats['whislo'], x), np.compress(x > stats['whishi'], x) ]) # add in the remaining stats stats['q1'], stats['med'], stats['q3'] = q1, med, q3 return bxpstats