def _avg_image(self, images): images, auxvars, weights = images print("{0}: plotting {1} images".format(self.name, images.shape[0])) print("min weight: {0} max weight: {1}".format(weights.min(), weights.max())) with ProgressBar(): avg_image = da.tensordot(images, weights, axes=(0, 0)).compute() / weights.sum() return avg_image
def iter_tdot(data, masks, repeats): frames = len(data) maskcount = len(masks) # can't preallocate here as tensordot doesn't have an out-parameter for repeat in range(repeats): result = da.tensordot(data, masks, (1, 1)) result.compute(num_workers=WORKERS) return result
def initbf(varr: xr.DataArray, A: xr.DataArray, C: xr.DataArray) -> Tuple[xr.DataArray, xr.DataArray]: """ Initialize background terms given spatial and temporal components of cells. A movie representation (with dimensions "height" "width" and "frame") of estimated cell activities are computed as the product between the spatial components matrix and the temporal components matrix of cells over the "unit_id" dimension. Then the residule movie is computed by subtracting the estimated cell activity movie from the input movie. Then the spatial footprint of background `b` is the mean of the residule movie over "frame" dimension, and the temporal component of background `f` is the mean of the residule movie over "height" and "width" dimensions. Parameters ---------- varr : xr.DataArray Input movie data. Should have dimensions ("frame", "height", "width"). A : xr.DataArray Estimation of spatial footprints of cells. Should have dimensions ("unit_id", "height", "width"). C : xr.DataArray Estimation of temporal activities of cells. Should have dimensions ("unit_id", "frame"). Returns ------- b : xr.DataArray Initial estimation of the spatial footprint of background. Has dimensions ("height", "width"). f : xr.DataArray Initial estimation of the temporal activity of background. Has dimension "frame". """ A = A.data.map_blocks(sparse.COO).compute() Yb = (varr - darr.tensordot(C, A, axes=[(0, ), (0, )])).clip(0) b = Yb.mean("frame") f = Yb.mean(["height", "width"]) arr_opt = fct.partial(custom_arr_optimize, rename_dict={"tensordot": "tensordot_restricted"}) with da.config.set(array_optimize=arr_opt): b = da.optimize(b)[0] f = da.optimize(f)[0] b, f = da.compute([b, f])[0] return b, f
def test_tensordot(): x = np.arange(400).reshape((20, 20)) a = da.from_array(x, chunks=(5, 4)) y = np.arange(200).reshape((20, 10)) b = da.from_array(y, chunks=(4, 5)) for axes in [1, (1, 0)]: assert_eq(da.tensordot(a, b, axes=axes), np.tensordot(x, y, axes=axes)) assert_eq(da.tensordot(x, b, axes=axes), np.tensordot(x, y, axes=axes)) assert_eq(da.tensordot(a, y, axes=axes), np.tensordot(x, y, axes=axes)) assert same_keys(da.tensordot(a, b, axes=(1, 0)), da.tensordot(a, b, axes=(1, 0))) with pytest.warns(None): # Increasing number of chunks warning assert not same_keys(da.tensordot(a, b, axes=0), da.tensordot(a, b, axes=1))
def test_tensordot(): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) x[x < 0.8] = 0 y = da.random.random((4, 3, 2), chunks=(2, 2, 1)) y[y < 0.8] = 0 xx = x.map_blocks(sparse.COO.from_numpy) yy = y.map_blocks(sparse.COO.from_numpy) assert_eq(da.tensordot(x, y, axes=(2, 0)), da.tensordot(xx, yy, axes=(2, 0))) assert_eq(da.tensordot(x, y, axes=(1, 1)), da.tensordot(xx, yy, axes=(1, 1))) assert_eq( da.tensordot(x, y, axes=((1, 2), (1, 0))), da.tensordot(xx, yy, axes=((1, 2), (1, 0))), )
def test_tensordot(): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) x[x < 0.4] = 0 y = da.random.random((4, 3, 2), chunks=(2, 2, 1)) y[y < 0.4] = 0 xx = da.ma.masked_equal(x, 0) yy = da.ma.masked_equal(y, 0) assert_eq(da.tensordot(x, y, axes=(2, 0)), da.ma.filled(da.tensordot(xx, yy, axes=(2, 0)), 0)) assert_eq(da.tensordot(x, y, axes=(1, 1)), da.ma.filled(da.tensordot(xx, yy, axes=(1, 1)), 0)) assert_eq(da.tensordot(x, y, axes=((1, 2), (1, 0))), da.ma.filled(da.tensordot(xx, yy, axes=((1, 2), (1, 0))), 0))
def test_tensordot(): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) x[x < 0.8] = 0 y = da.random.random((4, 3, 2), chunks=(2, 2, 1)) y[y < 0.8] = 0 xx = x.map_blocks(sparse.COO.from_numpy) yy = y.map_blocks(sparse.COO.from_numpy) assert_eq(da.tensordot(x, y, axes=(2, 0)), da.tensordot(xx, yy, axes=(2, 0))) assert_eq(da.tensordot(x, y, axes=(1, 1)), da.tensordot(xx, yy, axes=(1, 1))) assert_eq(da.tensordot(x, y, axes=((1, 2), (1, 0))), da.tensordot(xx, yy, axes=((1, 2), (1, 0))))
def test_tensordot(): x = np.arange(400).reshape((20, 20)) a = da.from_array(x, chunks=(5, 4)) y = np.arange(200).reshape((20, 10)) b = da.from_array(y, chunks=(4, 5)) for axes in [1, (1, 0)]: assert_eq(da.tensordot(a, b, axes=axes), np.tensordot(x, y, axes=axes)) assert_eq(da.tensordot(x, b, axes=axes), np.tensordot(x, y, axes=axes)) assert_eq(da.tensordot(a, y, axes=axes), np.tensordot(x, y, axes=axes)) assert same_keys(da.tensordot(a, b, axes=(1, 0)), da.tensordot(a, b, axes=(1, 0))) # Increasing number of chunks warning with pytest.warns(None if sys.version_info[0] == 2 else da.PerformanceWarning): assert not same_keys(da.tensordot(a, b, axes=0), da.tensordot(a, b, axes=1))
def initC(varr: xr.DataArray, A: xr.DataArray) -> xr.DataArray: """ Initialize temporal component given spatial footprints. The spatial footprints of each cell is first normalized to unit sum. Then the temporal component is computed as the tensor dot product between the input movie and the spatial footprints over the "height" and "width" dimensions. In other word, the initial temporal component is a weighted average of fluorescence activities in the input data with weights defined by spatial footprints. Parameters ---------- varr : xr.DataArray Input movie data. Should have dimensions ("height", "width", "frame"). A : xr.DataArray Spatial footprints of cells. Should have dimensions ("unit_id", "height", "width"). Returns ------- C : xr.DataArray The initial estimation of temporal components for each cell. Should have dimensions ("unit_id", "frame"). """ uids = A.coords["unit_id"] fms = varr.coords["frame"] A = A.data.map_blocks( sparse.COO).map_blocks(lambda a: a / a.sum()).compute() C = darr.tensordot(A, varr, axes=[(1, 2), (1, 2)]) C = xr.DataArray(C, dims=["unit_id", "frame"], coords={ "unit_id": uids, "frame": fms }) return C
def test_tensordot_2(axes): x = np.arange(4 * 4 * 4).reshape((4, 4, 4)) y = da.from_array(x, chunks=2) assert_eq(da.tensordot(y, y, axes=axes), np.tensordot(x, x, axes=axes))
def test_tensordot_more_than_26_dims(): ndim = 27 x = np.broadcast_to(1, [2] * ndim) dx = da.from_array(x, chunks=-1) assert_eq(da.tensordot(dx, dx, ndim), np.array(2**ndim))
lambda x: x, lambda x: da.expm1(x), lambda x: 2 * x, lambda x: x / 2, lambda x: x**2, lambda x: x + x, lambda x: x * x, lambda x: x[0], lambda x: x[:, 1], lambda x: x[:1, None, 1:3], lambda x: x.T, lambda x: da.transpose(x, (1, 2, 0)), lambda x: x.sum(), lambda x: x.dot(np.arange(x.shape[-1])), lambda x: x.dot(np.eye(x.shape[-1])), lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]), lambda x: x.sum(axis=0), lambda x: x.max(axis=0), lambda x: x.sum(axis=(1, 2)), lambda x: x.astype(np.complex128), lambda x: x.map_blocks(lambda x: x * 2), lambda x: x.round(1), lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])), lambda x: abs(x), lambda x: x > 0.5, lambda x: x.rechunk((4, 4, 4)), lambda x: x.rechunk((2, 2, 1)), ] @pytest.mark.parametrize('func', functions)
lambda x: x, pytest.mark.xfail(lambda x: da.expm1(x), reason="expm1 isn't a proper ufunc"), lambda x: 2 * x, lambda x: x / 2, lambda x: x**2, lambda x: x + x, lambda x: x * x, lambda x: x[0], lambda x: x[:, 1], lambda x: x[:1, None, 1:3], lambda x: x.T, lambda x: da.transpose(x, (1, 2, 0)), lambda x: x.sum(), pytest.mark.xfail(lambda x: x.dot(np.arange(x.shape[-1])), reason='cupy.dot(numpy) fails'), pytest.mark.xfail(lambda x: x.dot(np.eye(x.shape[-1])), reason='cupy.dot(numpy) fails'), pytest.mark.xfail(lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]), reason='cupy.dot(numpy) fails'), lambda x: x.sum(axis=0), lambda x: x.max(axis=0), lambda x: x.sum(axis=(1, 2)), lambda x: x.astype(np.complex128), lambda x: x.map_blocks(lambda x: x * 2), pytest.mark.xfail(lambda x: x.round(1), reason="cupy doesn't support round"), lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])), lambda x: abs(x), lambda x: x > 0.5, lambda x: x.rechunk((4, 4, 4)), lambda x: x.rechunk((2, 2, 1)), ]
lambda x: x.moment(order=0), pytest.param( lambda x: x.std(), marks=pytest.mark.xfail( reason="fixed in https://github.com/pydata/sparse/pull/243" ), ), pytest.param( lambda x: x.var(), marks=pytest.mark.xfail( reason="fixed in https://github.com/pydata/sparse/pull/243" ), ), lambda x: x.dot(np.arange(x.shape[-1])), lambda x: x.dot(np.eye(x.shape[-1])), lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]), lambda x: x.sum(axis=0), lambda x: x.max(axis=0), lambda x: x.sum(axis=(1, 2)), lambda x: x.astype(np.complex128), lambda x: x.map_blocks(lambda x: x * 2), lambda x: x.round(1), lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])), lambda x: abs(x), lambda x: x > 0.5, lambda x: x.rechunk((4, 4, 4)), lambda x: x.rechunk((2, 2, 1)), lambda x: np.isneginf(x), lambda x: np.isposinf(x), ]
data_ends_list = row_info_holder[file_name]["Ends"] for data_idx in range(len(data_name_list)): data_name = data_name_list[data_idx] tmp_data_holder = h5file_holder[file_name][data_name] tmp_dask_data_holder = da.from_array(tmp_data_holder[data_ends_list[data_idx][0]: data_ends_list[data_idx][1]] , chunks='auto') dataset_holder.append(tmp_dask_data_holder) # Create dask arrays based on these h5 files dataset = da.concatenate(dataset_holder, axis=0) # Calculate the correlation matrix. num_dim = len(dataset.shape) inner_prod_matrix = da.tensordot(dataset, dataset, axes=(list(range(1, num_dim)), list(range(1, num_dim)))) # Save the distance patch name_to_save = address_output + "/distances/patch_{}_{}.npy".format(comm_rank - 1, comm_rank - 1) da.to_npy_stack(name_to_save, inner_prod_matrix) # comm.Barrier() # There is no need to synchronize here """ Step Four: Calculate the off-diagonal patch """ # Construct the data for off-diagonal patch patch_number = len(job_list[comm_rank - 1]) - 1 for _local_idx in range(1, patch_number): # The first patch calculated for each row is the diagonal patch.
import dask.array as da from dask.diagnostics import ProgressBar from deepjets.samples import make_flat_images ret = make_flat_images( '/coepp/cephfs/mel/edawe/deepjets/events/pythia/images/default2/qcd_j1p0_sj0p30_delphes_jets_pileup_images.h5', 250, 300, mass_min=50, mass_max=110) images, auxvars, weights = ret print images print weights print auxvars print auxvars['generator_weights'] print "average" with ProgressBar(): avg_image = da.tensordot(images, weights, axes=(0, 0)).compute() / weights.sum() print avg_image
def __get_seasonal_means_with_ttest_stats_dask_lazy( self, data, season_to_monthperiod=None, start_year=-np.Inf, end_year=np.Inf, convert_monthly_accumulators_to_daily=False): # mask the resulting fields epsilon = 1.0e-5 mask = np.less_equal(np.abs(data[0, :, :] - self.missing_value), epsilon) print("data.shape = ", data.shape) data_sel, times_sel = data, self.time # select the interval of interest if convert_monthly_accumulators_to_daily: ndays = da.from_array( np.array([ calendar.monthrange(d.year, d.month)[1] for d in times_sel ]), (100, )) ndays = da.transpose(da.broadcast_to( da.from_array(ndays, ndays.shape), data_sel.shape[1:] + ndays.shape), axes=(2, 0, 1)) data_sel = data_sel / ndays year_month_to_index_arr = defaultdict(list) for i, t in enumerate(times_sel): year_month_to_index_arr[t.year, t.month].append(i) # calculate monthly means monthly_data = {} for y in range(start_year, end_year + 1): for m in range(1, 13): aslice = slice(year_month_to_index_arr[y, m][0], year_month_to_index_arr[y, m][-1] + 1) print(aslice, data_sel.shape) monthly_data[y, m] = data_sel[aslice, :, :].mean(axis=0) result = OrderedDict() for season, month_period in season_to_monthperiod.items(): assert isinstance(month_period, MonthPeriod) seasonal_means = [] ndays_per_season = [] for p in month_period.get_season_periods(start_year=start_year, end_year=end_year): lmos = da.stack([ monthly_data[start.year, start.month] for start in p.range("months") ]) ndays_per_month = np.array([ calendar.monthrange(start.year, start.month)[1] for start in p.range("months") ]) ndays_per_month = da.from_array(ndays_per_month, ndays_per_month.shape) print(p) print(lmos.shape, ndays_per_month.shape, ndays_per_month.sum()) seasonal_mean = da.tensordot( lmos, ndays_per_month, axes=([ 0, ], [ 0, ])) / ndays_per_month.sum() seasonal_means.append(seasonal_mean) ndays_per_season.append(ndays_per_month.sum()) seasonal_means = da.stack(seasonal_means) ndays_per_season = np.array(ndays_per_season) ndays_per_season = da.from_array(ndays_per_season, ndays_per_season.shape) print(seasonal_means.shape, ndays_per_season.shape) assert seasonal_means.shape[0] == ndays_per_season.shape[0] clim_mean = da.tensordot( seasonal_means, ndays_per_season, axes=([ 0, ], [ 0, ])) / ndays_per_season.sum() clim_std = ((seasonal_means - da.broadcast_to(clim_mean, seasonal_means.shape))**2 * ndays_per_season[:, np.newaxis, np.newaxis]).sum( axis=0) / ndays_per_season.sum() clim_std = clim_std**0.5 result[season] = [clim_mean, clim_std, ndays_per_season.shape[0]] return result, mask