def merge(self, dest, src): """ Given destination and source buffers that contain sum of variances, sum of frames, and the number of frames used in each of the buffers, merge the source buffers into the destination buffers by computing the joint sum of variances and sum of frames over all frames used Parameters ---------- dest Aggregation bufer that contains sum of variances, sum of frames, and the number of frames src Partial results that contains sum of variances, sum of frames, and the number of frames of a partition to be merged into the aggregation buffers """ dest_n = dest['num_frames'][0] src_n = src['num_frames'][0] n = merge( dest_n=dest_n, dest_sum=reshaped_view(dest['sum'], (-1, )), dest_varsum=reshaped_view(dest['varsum'], (-1, )), src_n=src_n, src_sum=reshaped_view(src['sum'], (-1, )), src_varsum=reshaped_view(src['varsum'], (-1, )), ) dest['num_frames'][:] = n
def process_tile(self, tile): """ Calculate a sum and variance minibatch for the tile and update partition buffers with it. Parameters ---------- tile tile of the data """ key = tile.scheme_idx n_0 = self.task_data.num_frames[key] n_1 = tile.shape[0] if n_0 == 0: self.results.sum[:] = tile.sum(axis=0) # ddof changes the number the sum of variances is divided by. # Setting it like here avoids multiplying by n_1 to get the sum # of variances # See https://docs.scipy.org/doc/numpy/reference/generated/numpy.var.html self.results.varsum[:] = np.var(tile, axis=0, ddof=n_1 - 1) self.task_data.num_frames[key] = n_1 else: self.task_data.num_frames[key] = process_tile( tile=reshaped_view(tile, (n_1, -1)), n_0=n_0, sum_inout=reshaped_view(self.results.sum, (-1, )), varsum_inout=reshaped_view(self.results.varsum, (-1, )), )
def test_reshaped_view(): data = np.zeros((2, 5)) view = data[:, :3] with pytest.raises(AttributeError): reshaped_view(view, (-1, )) view_2 = reshaped_view(data, (-1, )) view_2[0] = 1 assert data[0, 0] == 1 assert np.all(data[0, 1:] == 0) assert np.all(data[1:] == 0)
def test_reshape_nav(default_frms6, lt_ctx): udf = PickUDF() roi = np.zeros(default_frms6.shape.nav, dtype=bool) flat_roi = reshaped_view(roi, -1) flat_roi[:8] = True ref = lt_ctx.run_udf(dataset=default_frms6, udf=udf, roi=roi) ds_1 = lt_ctx.load("frms6", path=FRMS6_TESTDATA_PATH, nav_shape=(8, ), enable_offset_correction=True) result_1 = lt_ctx.run_udf(dataset=ds_1, udf=udf) shape_1 = lt_ctx.run_udf(dataset=ds_1, udf=SumSigUDF()) assert shape_1['intensity'].data.shape == (8, ) assert np.allclose(result_1['intensity'].raw_data, ref['intensity'].raw_data) ds_2 = lt_ctx.load("frms6", path=FRMS6_TESTDATA_PATH, nav_shape=(2, 2, 2), enable_offset_correction=True) result_2 = lt_ctx.run_udf(dataset=ds_2, udf=udf) shape_2 = lt_ctx.run_udf(dataset=ds_2, udf=SumSigUDF()) assert shape_2['intensity'].data.shape == (2, 2, 2) assert np.allclose(result_2['intensity'].raw_data, ref['intensity'].raw_data)
def test_reshape_nav(default_k2is, lt_ctx): udf = PickUDF() roi = np.zeros(default_k2is.shape.nav, dtype=bool) flat_roi = reshaped_view(roi, -1) flat_roi[:8] = True ref = lt_ctx.run_udf(dataset=default_k2is, udf=udf, roi=roi) ds_1 = lt_ctx.load( "k2is", path=K2IS_TESTDATA_PATH, nav_shape=(8, ), ) result_1 = lt_ctx.run_udf(dataset=ds_1, udf=udf) shape_1 = lt_ctx.run_udf(dataset=ds_1, udf=SumSigUDF()) assert shape_1['intensity'].data.shape == (8, ) assert np.allclose(result_1['intensity'].raw_data, ref['intensity'].raw_data) ds_2 = lt_ctx.load( "k2is", path=K2IS_TESTDATA_PATH, nav_shape=(2, 2, 2), ) result_2 = lt_ctx.run_udf(dataset=ds_2, udf=udf) shape_2 = lt_ctx.run_udf(dataset=ds_2, udf=SumSigUDF()) assert shape_2['intensity'].data.shape == (2, 2, 2) assert np.allclose(result_2['intensity'].raw_data, ref['intensity'].raw_data)
def test_comparison(default_k2is, default_k2is_raw, lt_ctx_fast): udf = ValidationUDF( reference=reshaped_view(default_k2is_raw, (-1, *tuple(default_k2is.shape.sig))), validation_function=lambda a, b: np.all(a == b), ) lt_ctx_fast.run_udf(udf=udf, dataset=default_k2is)
def test_negative_sync_offset(default_k2is, lt_ctx): udf = PickUDF() # native_sync_offset is 250 sync_offset = -2 roi_1 = np.zeros(default_k2is.shape.nav, dtype=bool) flat_roi_1 = reshaped_view(roi_1, -1) flat_roi_1[:8] = True ref = lt_ctx.run_udf(dataset=default_k2is, udf=udf, roi=roi_1) roi_2 = np.zeros(default_k2is.shape.nav, dtype=bool) flat_roi_2 = reshaped_view(roi_2, -1) flat_roi_2[252:260] = True ds = lt_ctx.load("k2is", path=K2IS_TESTDATA_PATH, sync_offset=sync_offset) result = lt_ctx.run_udf(dataset=ds, udf=udf, roi=roi_2) assert np.allclose(result['intensity'].raw_data, ref['intensity'].raw_data)
def test_positive_sync_offset(default_frms6, lt_ctx): udf = PickUDF() sync_offset = 2 roi = np.zeros(default_frms6.shape.nav, dtype=bool) flat_roi = reshaped_view(roi, -1) flat_roi[2:10] = True ref = lt_ctx.run_udf(dataset=default_frms6, udf=udf, roi=roi) ds = lt_ctx.load("frms6", path=FRMS6_TESTDATA_PATH, nav_shape=(4, 2), sync_offset=sync_offset, enable_offset_correction=True) result = lt_ctx.run_udf(dataset=ds, udf=udf) assert np.allclose(result['intensity'].raw_data, ref['intensity'].raw_data)
def test_positive_sync_offset_1(default_k2is, lt_ctx): udf = PickUDF() # native_sync_offset is 250 sync_offset = 252 roi = np.zeros(default_k2is.shape.nav, dtype=bool) flat_roi = reshaped_view(roi, -1) flat_roi[2:10] = True ref = lt_ctx.run_udf(dataset=default_k2is, udf=udf, roi=roi) ds = lt_ctx.load( "k2is", path=K2IS_TESTDATA_PATH, nav_shape=(4, 2), sync_offset=sync_offset, ) result = lt_ctx.run_udf(dataset=ds, udf=udf) assert np.allclose(result['intensity'].raw_data, ref['intensity'].raw_data)
def default_frms6_raw(tmpdir_factory): fn = tmpdir_factory.mktemp("data").join("frms6.raw") # we use a memory mapped file to make this work # on machines that can't hold the full dataset in memory data = np.memmap(str(fn), mode='w+', shape=(256, 256, 264, 264), dtype='uint16') view = reshaped_view(data, (256*256, 264, 264)) root, ext = os.path.splitext(FRMS6_TESTDATA_PATH) files = list(sorted(glob.glob(root + '*.frms6'))) blocksize = 15 offset = 0 # we skip the first file, it contains a zero reference for f in files[1:]: raw_shape = stemtool.util.pnccd.Frms6Reader.getDataShape(f) frame_count = raw_shape[-1] # We go blockwise to reduce memory consumption for start in range(0, frame_count, blocksize): stop = min(start+blocksize, frame_count) block = _read_block(f, raw_shape, start, stop) view[offset + start:offset + stop] = _unfold_block(block) offset += frame_count return data
def test_negative_sync_offset(default_mrc, lt_ctx): # nav shape 4 udf = PickUDF() sync_offset = -2 roi = np.zeros(default_mrc.shape.nav, dtype=bool) flat_roi = reshaped_view(roi, -1) flat_roi[:2] = True ref = lt_ctx.run_udf(dataset=default_mrc, udf=udf, roi=roi) ds_with_offset = lt_ctx.load( "mrc", path=MRC_TESTDATA_PATH, nav_shape=(2, 2), sync_offset=sync_offset ) result_with_offset = lt_ctx.run_udf(dataset=ds_with_offset, udf=udf) shape = lt_ctx.run_udf(dataset=ds_with_offset, udf=SumSigUDF()) print(result_with_offset['intensity'].raw_data.shape) assert shape['intensity'].data.shape == (2, 2) assert np.allclose( result_with_offset['intensity'].raw_data[2:], ref['intensity'].raw_data )
def test_comparison(default_seq, default_seq_raw, lt_ctx_fast): corrset = CorrectionSet() udf = ValidationUDF(reference=reshaped_view(default_seq_raw, ( -1, *tuple(default_seq.shape.sig)))) lt_ctx_fast.run_udf(udf=udf, dataset=default_seq, corrections=corrset)
def test_comparison(default_blo, default_blo_raw, lt_ctx_fast): udf = ValidationUDF( reference=reshaped_view(default_blo_raw, (-1, *tuple(default_blo.shape.sig))) ) lt_ctx_fast.run_udf(udf=udf, dataset=default_blo)
def process_tile(self, tile): reshaped_view(tile, (tile.shape[0], -1)) flat_buf = reshaped_view(self.results.sigbuf, (-1, )) flat_buf[:] = 1