Esempio n. 1
0
def cumplot(ax, x, y, label):
    x = uf.cumsum(x)
    y = uf.cumsum(y)
    mask = (~uf.isnan(x) & ~uf.isnan(y) & ~uf.isinf(x) & ~uf.isinf(y))
    line, = ax.plot(x.values[mask.values], y.values[mask.values], label=label)
    ax.set_ylabel(y.attrs['quantity'] + ' (' + y.attrs['unit'] + ')')
    ax.set_xlabel(x.attrs['quantity'] + ' (' + x.attrs['unit'] + ')')
Esempio n. 2
0
def interm(x, y, axis=None):
    n = np.nansum((x > 0.1) & (y > 0.1) & ~ufuncs.isnan(x) & ~ufuncs.isnan(y),
                  axis=axis)
    o = np.nansum(
        ((x > 0.1) | (y > 0.1)) & ~ufuncs.isnan(x) & ~ufuncs.isnan(y),
        axis=axis)
    return n / o
Esempio n. 3
0
def timeseries(array):
    fig, ax = pl.subplots()
    array = array[~uf.isnan(array)]
    line, = ax.plot(array['time'].values, array.values, label=array.name)
    ax.set_ylabel(array.attrs['quantity'] + ' (' + array.attrs['unit'] + ')')
    ax.legend()
    ax.set_xlabel('time')
    pl.grid = True
    return fig, ax
Esempio n. 4
0
def kde(x, y, bandwidth=1.0, log=False, num=200, **kwargs):
    """
    kernel density estimation
    """
    typ = type(x)
    xdim = x.dims
    ydim = y.dims
    if xdim != ydim:
        err = 'unequal dimensions: %s and %s' % (str(xdim), str(ydim))
        raise Exception(err)

    # prepare training data
    if log is True:
        x, y = uf.log(x), uf.log(y)
    x_m, y_m = uf.nanmean(x), uf.nanmean(y)
    x_s, y_s = uf.nanstd(x), uf.nanstd(y)
    x = (x - x_m) / x_s
    y = (y - y_m) / y_s

    # estimate  bandwidth with cross validation
    mask = (~uf.isnan(y) & ~uf.isnan(x) & ~uf.isinf(y) & ~uf.isinf(x))
    train = np.vstack((x.values[mask.values], y.values[mask.values])).T
    if bandwidth is None:
        gcv = GridSearchCV(KernelDensity(),
                           {'bandwidth': np.linspace(0.05, 1.0)},
                           cv=5)
        gcv.fit(train)
        bandwidth = gcv.best_params_['bandwidth']
    kde = KernelDensity(bandwidth=bandwidth).fit(train)

    # prepare sample grid
    x_g = np.linspace(uf.nanmin(x).values, uf.nanmax(x).values, num=num)
    y_g = np.linspace(uf.nanmin(y).values, uf.nanmax(y).values, num=num)

    x_grid, y_grid = np.meshgrid(x_g, y_g)
    grid = np.vstack((x_grid.flatten(), y_grid.flatten())).T
    z = np.exp(kde.score_samples(grid)).reshape(x_grid.shape).T

    x = Index(x_g * x_s + x_m, name=x.name, attrs=x.attrs)
    y = Index(y_g * y_s + y_m, name=y.name, attrs=y.attrs)
    return typ(z,
               coords=[x, y],
               name='density',
               attrs={'bandwidth': bandwidth})
Esempio n. 5
0
def linregress2(x, y, prob=0.95, **kwargs):
    """
    linear least-squares regression
    """
    c = uf.cov(x, y).task
    r = uf.corr(x, y).task[0, 1]
    a1 = (c[0, 1] / c[0, 0])

    cond = uf.isnan(x) | uf.isnan(y)
    x = uf.where(cond, np.nan, x)
    y = uf.where(cond, np.nan, y)

    n = uf.nansum(~uf.isnan(x)).task
    a0 = uf.nanmean(y) - uf.nanmean(x) * a1
    stderr = uf.sqrt((1 - r**2) * c[1, 1] / c[0, 0] / (n - 2))
    stderr_res = uf.nanstd(y - a0 - x * a1) * ((n - 1) / (n - 2))**0.5
    stderr_a1 = stderr_res / uf.nanstd(x) * n**-0.5
    return da.compute(a0.task, a1, r, stderr, stderr_res.task, stderr_a1.task,
                      c, n)