Example #1
0
    def _checks_and_wrangling(self, x, w):
        # Manage the input data in the same fashion as mpl
        if np.isscalar(x):
            x = [x]

        input_empty = (np.size(x) == 0)

        # Massage 'x' for processing.
        if input_empty:
            x = np.array([[]])
        elif mpl.__version__ < '2.1.0':
            x = cbook._reshape_2D(x)
        else:
            x = cbook._reshape_2D(x, 'x')

        self.n_data_sets = len(x)  # number of datasets

        # We need to do to 'weights' what was done to 'x'
        if w is not None:
            if mpl.__version__ < '2.1.0':
                w = cbook._reshape_2D(w)
            else:
                w = cbook._reshape_2D(w, 'w')

        if w is not None and len(w) != self.n_data_sets:
            raise ValueError('weights should have the same shape as x')

        if w is not None:
            for xi, wi in zip(x, w):
                if wi is not None and len(wi) != len(xi):
                    raise ValueError('weights should have the same shape as x')

        return x, w
Example #2
0
def test_reshape2d_pandas(pd):
    # separate to allow the rest of the tests to run if no pandas...
    X = np.arange(30).reshape(10, 3)
    x = pd.DataFrame(X, columns=["a", "b", "c"])
    Xnew = cbook._reshape_2D(x, 'x')
    # Need to check each row because _reshape_2D returns a list of arrays:
    for x, xnew in zip(X.T, Xnew):
        np.testing.assert_array_equal(x, xnew)

    X = np.arange(30).reshape(10, 3)
    x = pd.DataFrame(X, columns=["a", "b", "c"])
    Xnew = cbook._reshape_2D(x, 'x')
    # Need to check each row because _reshape_2D returns a list of arrays:
    for x, xnew in zip(X.T, Xnew):
        np.testing.assert_array_equal(x, xnew)
Example #3
0
def test_reshape2d_xarray(xr):
    # separate to allow the rest of the tests to run if no xarray...
    X = np.arange(30).reshape(10, 3)
    x = xr.DataArray(X, dims=["x", "y"])
    Xnew = cbook._reshape_2D(x, 'x')
    # Need to check each row because _reshape_2D returns a list of arrays:
    for x, xnew in zip(X.T, Xnew):
        np.testing.assert_array_equal(x, xnew)
Example #4
0
def test_reshape2d():

    class dummy:
        pass

    xnew = cbook._reshape_2D([], 'x')
    assert np.shape(xnew) == (1, 0)

    x = [dummy() for j in range(5)]

    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = np.arange(5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = [[dummy() for j in range(5)] for i in range(3)]
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (3, 5)

    # this is strange behaviour, but...
    x = np.random.rand(3, 5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (5, 3)

    # Now test with a list of lists with different lengths, which means the
    # array will internally be converted to a 1D object array of lists
    x = [[1, 2, 3], [3, 4], [2]]
    xnew = cbook._reshape_2D(x, 'x')
    assert isinstance(xnew, list)
    assert isinstance(xnew[0], np.ndarray) and xnew[0].shape == (3,)
    assert isinstance(xnew[1], np.ndarray) and xnew[1].shape == (2,)
    assert isinstance(xnew[2], np.ndarray) and xnew[2].shape == (1,)

    # We now need to make sure that this works correctly for Numpy subclasses
    # where iterating over items can return subclasses too, which may be
    # iterable even if they are scalars. To emulate this, we make a Numpy
    # array subclass that returns Numpy 'scalars' when iterating or accessing
    # values, and these are technically iterable if checking for example
    # isinstance(x, collections.abc.Iterable).

    class ArraySubclass(np.ndarray):

        def __iter__(self):
            for value in super().__iter__():
                yield np.array(value)

        def __getitem__(self, item):
            return np.array(super().__getitem__(item))

    v = np.arange(10, dtype=float)
    x = ArraySubclass((10,), dtype=float, buffer=v.data)

    xnew = cbook._reshape_2D(x, 'x')

    # We check here that the array wasn't split up into many individual
    # ArraySubclass, which is what used to happen due to a bug in _reshape_2D
    assert len(xnew) == 1
    assert isinstance(xnew[0], ArraySubclass)
Example #5
0
def test_reshape2d():

    class dummy():
        pass

    xnew = cbook._reshape_2D([], 'x')
    assert np.shape(xnew) == (1, 0)

    x = [dummy() for j in range(5)]

    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = np.arange(5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = [[dummy() for j in range(5)] for i in range(3)]
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (3, 5)

    # this is strange behaviour, but...
    x = np.random.rand(3, 5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (5, 3)

    # Now test with a list of lists with different lengths, which means the
    # array will internally be converted to a 1D object array of lists
    x = [[1, 2, 3], [3, 4], [2]]
    xnew = cbook._reshape_2D(x, 'x')
    assert isinstance(xnew, list)
    assert isinstance(xnew[0], np.ndarray) and xnew[0].shape == (3,)
    assert isinstance(xnew[1], np.ndarray) and xnew[1].shape == (2,)
    assert isinstance(xnew[2], np.ndarray) and xnew[2].shape == (1,)

    # We now need to make sure that this works correctly for Numpy subclasses
    # where iterating over items can return subclasses too, which may be
    # iterable even if they are scalars. To emulate this, we make a Numpy
    # array subclass that returns Numpy 'scalars' when iterating or accessing
    # values, and these are technically iterable if checking for example
    # isinstance(x, collections.abc.Iterable).

    class ArraySubclass(np.ndarray):

        def __iter__(self):
            for value in super().__iter__():
                yield np.array(value)

        def __getitem__(self, item):
            return np.array(super().__getitem__(item))

    v = np.arange(10, dtype=float)
    x = ArraySubclass((10,), dtype=float, buffer=v.data)

    xnew = cbook._reshape_2D(x, 'x')

    # We check here that the array wasn't split up into many individual
    # ArraySubclass, which is what used to happen due to a bug in _reshape_2D
    assert len(xnew) == 1
    assert isinstance(xnew[0], ArraySubclass)
Example #6
0
def test_reshape2d():
    class dummy():
        pass
    x = [dummy() for j in range(5)]
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = np.arange(5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = [[dummy() for j in range(5)] for i in range(3)]
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (3, 5)

    # this is strange behaviour, but...
    x = np.random.rand(3, 5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (5, 3)
Example #7
0
def test_reshape2d():
    class dummy():
        pass

    x = [dummy() for j in range(5)]
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = np.arange(5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (1, 5)

    x = [[dummy() for j in range(5)] for i in range(3)]
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (3, 5)

    # this is strange behaviour, but...
    x = np.random.rand(3, 5)
    xnew = cbook._reshape_2D(x, 'x')
    assert np.shape(xnew) == (5, 3)
def nofig_cumulative_hist(x, bins):
    """
    The following function is based off the hist() class-based function within the
    _axes.py file of matplotlib located in:
    ...\matplotlib-base-3.0.3-py37h3e3dc42_0\Lib\site-packages\matplotlib\axes\_axes.py
    The current hist() method in matplotlib outputs figures along with data. The
    problem with this is that figures take computation time to execute and iterating
    this method over multiple datasets can cause crashing. At the same time,
    downstream functions require input data in a similar format as matplotlib
    function output. To resolve this, we use the hist() class function used in
    matplotlib and suppress the section of code that draws out the graphs and
    only output an array of the height-data and corresponding bins.

    Parameters
    ----------
    x : np.uint16 : List of datapoints to process
    bins : np.uint16 : String of bins to use in np.histogram()

    Return
    ------
    tops : np.array(dtype=float) : histogram heights
    bins : list : list of bin edges
    """
    tops = []
    x = cbook._reshape_2D(x, 'x')
    nx = len(x)
    xmin = np.inf
    xmax = -np.inf
    for xi in x:
        if len(xi) > 0:
            xmin = min(xmin, np.nanmin(xi))
            xmax = max(xmax, np.nanmax(xi))
    bin_range = (xmin, xmax)
    for i in range(nx):
        m, bins = np.histogram(x[i], bins, bin_range, density=True)
        m = m.astype(float)
        tops.append(m)
    slc = slice(None)
    tops = [(m * np.diff(bins))[slc].cumsum()[slc] for m in tops]
    return tops, bins
Example #9
0
def my_boxplot_stats(X,
                     whis=1.5,
                     bootstrap=None,
                     labels=None,
                     autorange=False,
                     percents=[25, 75]):
    '''
    Return statistics computed for boxplot
    '''
    def _bootstrap_median(data, N=5000):
        # determine 95% confidence intervals of the median
        M = len(data)
        percentiles = [2.5, 97.5]

        bs_index = np.random.randint(M, size=(N, M))
        bsData = data[bs_index]
        estimate = np.median(bsData, axis=1, overwrite_input=True)

        CI = np.percentile(estimate, percentiles)
        return CI

    def _compute_conf_interval(data, med, iqr, bootstrap):
        if bootstrap is not None:
            # Do a bootstrap estimate of notch locations.
            # get conf. intervals around median
            CI = _bootstrap_median(data, N=bootstrap)
            notch_min = CI[0]
            notch_max = CI[1]
        else:

            N = len(data)
            notch_min = med - 1.57 * iqr / np.sqrt(N)
            notch_max = med + 1.57 * iqr / np.sqrt(N)

        return notch_min, notch_max

    # output is a list of dicts
    bxpstats = []

    # convert X to a list of lists
    X = _reshape_2D(X, "X")

    ncols = len(X)
    if labels is None:
        labels = itertools.repeat(None)
    elif len(labels) != ncols:
        raise ValueError("Dimensions of labels and X must be compatible")

    input_whis = whis
    for ii, (x, label) in enumerate(zip(X, labels)):

        # empty dict
        stats = {}
        if label is not None:
            stats['label'] = label

        # restore whis to the input values in case it got changed in the loop
        whis = input_whis

        # note tricksyness, append up here and then mutate below
        bxpstats.append(stats)

        # if empty, bail
        if len(x) == 0:
            stats['fliers'] = np.array([])
            stats['mean'] = np.nan
            stats['med'] = np.nan
            stats['q1'] = np.nan
            stats['q3'] = np.nan
            stats['cilo'] = np.nan
            stats['cihi'] = np.nan
            stats['whislo'] = np.nan
            stats['whishi'] = np.nan
            stats['med'] = np.nan
            continue

        # up-convert to an array, just to be safe
        x = np.asarray(x)

        # arithmetic mean
        stats['mean'] = np.mean(x)

        # median
        med = np.percentile(x, 50)
        ## Altered line
        q1, q3 = np.percentile(x, (percents[0], percents[1]))

        # interquartile range
        stats['iqr'] = q3 - q1
        if stats['iqr'] == 0 and autorange:
            whis = 'range'

        # conf. interval around median
        stats['cilo'], stats['cihi'] = _compute_conf_interval(
            x, med, stats['iqr'], bootstrap)

        # lowest/highest non-outliers
        if np.isscalar(whis):
            if np.isreal(whis):
                loval = q1 - whis * stats['iqr']
                hival = q3 + whis * stats['iqr']
            elif whis in ['range', 'limit', 'limits', 'min/max']:
                loval = np.min(x)
                hival = np.max(x)
            else:
                raise ValueError('whis must be a float, valid string, or list '
                                 'of percentiles')
        else:
            loval = np.percentile(x, whis[0])
            hival = np.percentile(x, whis[1])

        # get high extreme
        wiskhi = np.compress(x <= hival, x)
        if len(wiskhi) == 0 or np.max(wiskhi) < q3:
            stats['whishi'] = q3
        else:
            stats['whishi'] = np.max(wiskhi)

        # get low extreme
        wisklo = np.compress(x >= loval, x)
        if len(wisklo) == 0 or np.min(wisklo) > q1:
            stats['whislo'] = q1
        else:
            stats['whislo'] = np.min(wisklo)

        # compute a single array of outliers
        stats['fliers'] = np.hstack([
            np.compress(x < stats['whislo'], x),
            np.compress(x > stats['whishi'], x)
        ])

        # add in the remaining stats
        stats['q1'], stats['med'], stats['q3'] = q1, med, q3

    return bxpstats