def cumplot(ax, x, y, label): x = uf.cumsum(x) y = uf.cumsum(y) mask = (~uf.isnan(x) & ~uf.isnan(y) & ~uf.isinf(x) & ~uf.isinf(y)) line, = ax.plot(x.values[mask.values], y.values[mask.values], label=label) ax.set_ylabel(y.attrs['quantity'] + ' (' + y.attrs['unit'] + ')') ax.set_xlabel(x.attrs['quantity'] + ' (' + x.attrs['unit'] + ')')
def interm(x, y, axis=None): n = np.nansum((x > 0.1) & (y > 0.1) & ~ufuncs.isnan(x) & ~ufuncs.isnan(y), axis=axis) o = np.nansum( ((x > 0.1) | (y > 0.1)) & ~ufuncs.isnan(x) & ~ufuncs.isnan(y), axis=axis) return n / o
def timeseries(array): fig, ax = pl.subplots() array = array[~uf.isnan(array)] line, = ax.plot(array['time'].values, array.values, label=array.name) ax.set_ylabel(array.attrs['quantity'] + ' (' + array.attrs['unit'] + ')') ax.legend() ax.set_xlabel('time') pl.grid = True return fig, ax
def kde(x, y, bandwidth=1.0, log=False, num=200, **kwargs): """ kernel density estimation """ typ = type(x) xdim = x.dims ydim = y.dims if xdim != ydim: err = 'unequal dimensions: %s and %s' % (str(xdim), str(ydim)) raise Exception(err) # prepare training data if log is True: x, y = uf.log(x), uf.log(y) x_m, y_m = uf.nanmean(x), uf.nanmean(y) x_s, y_s = uf.nanstd(x), uf.nanstd(y) x = (x - x_m) / x_s y = (y - y_m) / y_s # estimate bandwidth with cross validation mask = (~uf.isnan(y) & ~uf.isnan(x) & ~uf.isinf(y) & ~uf.isinf(x)) train = np.vstack((x.values[mask.values], y.values[mask.values])).T if bandwidth is None: gcv = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.05, 1.0)}, cv=5) gcv.fit(train) bandwidth = gcv.best_params_['bandwidth'] kde = KernelDensity(bandwidth=bandwidth).fit(train) # prepare sample grid x_g = np.linspace(uf.nanmin(x).values, uf.nanmax(x).values, num=num) y_g = np.linspace(uf.nanmin(y).values, uf.nanmax(y).values, num=num) x_grid, y_grid = np.meshgrid(x_g, y_g) grid = np.vstack((x_grid.flatten(), y_grid.flatten())).T z = np.exp(kde.score_samples(grid)).reshape(x_grid.shape).T x = Index(x_g * x_s + x_m, name=x.name, attrs=x.attrs) y = Index(y_g * y_s + y_m, name=y.name, attrs=y.attrs) return typ(z, coords=[x, y], name='density', attrs={'bandwidth': bandwidth})
def linregress2(x, y, prob=0.95, **kwargs): """ linear least-squares regression """ c = uf.cov(x, y).task r = uf.corr(x, y).task[0, 1] a1 = (c[0, 1] / c[0, 0]) cond = uf.isnan(x) | uf.isnan(y) x = uf.where(cond, np.nan, x) y = uf.where(cond, np.nan, y) n = uf.nansum(~uf.isnan(x)).task a0 = uf.nanmean(y) - uf.nanmean(x) * a1 stderr = uf.sqrt((1 - r**2) * c[1, 1] / c[0, 0] / (n - 2)) stderr_res = uf.nanstd(y - a0 - x * a1) * ((n - 1) / (n - 2))**0.5 stderr_a1 = stderr_res / uf.nanstd(x) * n**-0.5 return da.compute(a0.task, a1, r, stderr, stderr_res.task, stderr_a1.task, c, n)