def test_upload_zarr_to_nonzarr_path(new_dandiset: SampleDandiset, tmp_path: Path) -> None: d = new_dandiset.dandiset dspath = new_dandiset.dspath (dspath / "sample.zarr").write_text("This is not a Zarr.\n") new_dandiset.upload(allow_any_path=True) (asset, ) = d.get_assets() assert isinstance(asset, RemoteBlobAsset) assert asset.asset_type is AssetType.BLOB assert asset.path == "sample.zarr" assert asset.get_raw_metadata( )["encodingFormat"] == "application/octet-stream" (dspath / "sample.zarr").unlink() zarr.save(dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1)) new_dandiset.upload(allow_any_path=True) (asset, ) = d.get_assets() assert isinstance(asset, RemoteZarrAsset) assert asset.asset_type is AssetType.ZARR assert asset.path == "sample.zarr" assert asset.get_raw_metadata()["encodingFormat"] == ZARR_MIME_TYPE download(d.version_api_url, tmp_path) assert_dirtrees_eq(dspath / "sample.zarr", tmp_path / d.identifier / "sample.zarr")
def test_zarr_properties(tmp_path: Path) -> None: # This test assumes that the Zarr serialization format never changes filepath = tmp_path / "example.zarr" dt = np.dtype("<i8") zarr.save(filepath, np.arange(1000, dtype=dt), np.arange(1000, 0, -1, dtype=dt)) zf = dandi_file(filepath) assert isinstance(zf, ZarrAsset) assert zf.filetree.size == 1516 assert zf.filetree.get_digest( ).value == "4313ab36412db2981c3ed391b38604d6-5--1516" entries = sorted(zf.iterfiles(include_dirs=True), key=attrgetter("parts")) assert [(str(e), e.size, e.get_digest().value) for e in entries] == [ (".zgroup", 24, "e20297935e73dd0154104d4ea53040ab"), ("arr_0", 746, "51c74ec257069ce3a555bdddeb50230a-2--746"), ("arr_0/.zarray", 315, "9e30a0a1a465e24220d4132fdd544634"), ("arr_0/0", 431, "ed4e934a474f1d2096846c6248f18c00"), ("arr_1", 746, "7b99a0ad9bd8bb3331657e54755b1a31-2--746"), ("arr_1/.zarray", 315, "9e30a0a1a465e24220d4132fdd544634"), ("arr_1/0", 431, "fba4dee03a51bde314e9713b00284a93"), ] assert zf.get_digest().value == "4313ab36412db2981c3ed391b38604d6-5--1516" stat = zf.stat() assert stat.size == 1516 assert stat.digest.value == "4313ab36412db2981c3ed391b38604d6-5--1516" assert sorted(stat.files, key=attrgetter("parts")) == [ e for e in entries if e.is_file() ]
def test_upload_zarr(new_dandiset: SampleDandiset) -> None: zarr.save(new_dandiset.dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1)) new_dandiset.upload() (asset, ) = new_dandiset.dandiset.get_assets() assert isinstance(asset, RemoteZarrAsset) assert asset.asset_type is AssetType.ZARR assert asset.path == "sample.zarr"
def test_upload_sync_zarr(mocker, zarr_dandiset): rmtree(zarr_dandiset.dspath / "sample.zarr") zarr.save(zarr_dandiset.dspath / "identity.zarr", np.eye(5)) confirm_mock = mocker.patch("click.confirm", return_value=True) zarr_dandiset.upload(sync=True) confirm_mock.assert_called_with("Delete 1 asset on server?") zarr_dandiset.dandiset.get_asset_by_path("identity.zarr") with pytest.raises(NotFoundError): zarr_dandiset.dandiset.get_asset_by_path("sample.zarr")
def merge_zarrs(zarr_paths, output_path): s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name='us-east-1')) arrays_to_merge = [] for zarr_path in zarr_paths: store = s3fs.S3Map(root=zarr_path[len("s3://"):], s3=s3, check=False) arr = zarr.Array(store) arrays_to_merge.append(arr) merged_array = numpy.concatenate(arrays_to_merge, axis=1) zarr.save(output_path, merged_array)
def test_digest_zarr(): # This test assumes that the Zarr serialization format never changes runner = CliRunner() with runner.isolated_filesystem(): dt = np.dtype("<i8") zarr.save("sample.zarr", np.arange(1000, dtype=dt), np.arange(1000, 0, -1, dtype=dt)) r = runner.invoke(digest, ["--digest", "zarr-checksum", "sample.zarr"]) assert r.exit_code == 0 assert r.output == "sample.zarr: 4313ab36412db2981c3ed391b38604d6-5--1516\n"
def test_upload_zarr_with_empty_dir(new_dandiset: SampleDandiset) -> None: zarr.save(new_dandiset.dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1)) (new_dandiset.dspath / "sample.zarr" / "empty").mkdir() new_dandiset.upload() (asset, ) = new_dandiset.dandiset.get_assets() assert isinstance(asset, RemoteZarrAsset) assert asset.asset_type is AssetType.ZARR assert asset.path == "sample.zarr" assert not (asset.filetree / "empty").exists()
def test_download_different_zarr(tmp_path: Path, zarr_dandiset: SampleDandiset) -> None: dd = tmp_path / zarr_dandiset.dandiset_id dd.mkdir() zarr.save(dd / "sample.zarr", np.eye(5)) download( zarr_dandiset.dandiset.version_api_url, tmp_path, existing="overwrite-different" ) assert_dirtrees_eq( zarr_dandiset.dspath / "sample.zarr", tmp_path / zarr_dandiset.dandiset_id / "sample.zarr", )
def test_download_nonzarr_to_zarr_path(new_dandiset: SampleDandiset, tmp_path: Path) -> None: d = new_dandiset.dandiset (new_dandiset.dspath / "sample.zarr").write_text("This is not a Zarr.\n") new_dandiset.upload(allow_any_path=True) dd = tmp_path / d.identifier dd.mkdir(parents=True, exist_ok=True) zarr.save(dd / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1)) download(d.version_api_url, tmp_path, existing="overwrite-different") assert (dd / "sample.zarr").is_file() assert (dd / "sample.zarr").read_text() == "This is not a Zarr.\n"
def main(): # another_test() args = get_args() arr = zarr_array(args) print(arr.chunks) run_test(args, arr, 'Zarr') p = os.path.join(args.path, '_zarr') zarr.save(p, arr) arr = zarr.open(p, 'r') print(arr.chunks) arr = hub_array(args) run_test(args, arr, 'Hub')
def test_download_different_zarr_delete_dir(new_dandiset: SampleDandiset, tmp_path: Path) -> None: d = new_dandiset.dandiset dspath = new_dandiset.dspath zarr.save(dspath / "sample.zarr", np.eye(5)) assert not any(p.is_dir() for p in (dspath / "sample.zarr").iterdir()) new_dandiset.upload() dd = tmp_path / d.identifier dd.mkdir(parents=True, exist_ok=True) zarr.save(dd / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1)) assert any(p.is_dir() for p in (dd / "sample.zarr").iterdir()) download(d.version_api_url, tmp_path, existing="overwrite-different") assert_dirtrees_eq(dspath / "sample.zarr", dd / "sample.zarr")
def test_upload_zarr(new_dandiset, tmp_path): filepath = tmp_path / "example.zarr" zarr.save(filepath, np.arange(1000), np.arange(1000, 0, -1)) zf = dandi_file(filepath) assert isinstance(zf, ZarrAsset) asset = zf.upload(new_dandiset.dandiset, {"description": "A test Zarr"}) assert isinstance(asset, RemoteZarrAsset) assert asset.asset_type is AssetType.ZARR assert asset.path == "example.zarr" md = asset.get_raw_metadata() assert md["encodingFormat"] == ZARR_MIME_TYPE assert md["description"] == "A test Zarr" md["description"] = "A modified Zarr" asset.set_raw_metadata(md) md = asset.get_raw_metadata() assert md["description"] == "A modified Zarr" for file_src in [zf, asset]: lgr.debug("Traversing %s", type(file_src).__name__) entries = sorted(file_src.iterfiles(include_dirs=True), key=attrgetter("parts")) assert [str(e) for e in entries] == [ ".zgroup", "arr_0", "arr_0/.zarray", "arr_0/0", "arr_1", "arr_1/.zarray", "arr_1/0", ] assert (file_src.filetree / ".zgroup").exists() assert (file_src.filetree / ".zgroup").is_file() assert not (file_src.filetree / ".zgroup").is_dir() assert (file_src.filetree / "arr_0").exists() assert not (file_src.filetree / "arr_0").is_file() assert (file_src.filetree / "arr_0").is_dir() assert not (file_src.filetree / "0").exists() assert not (file_src.filetree / "0").is_file() assert not (file_src.filetree / "0").is_dir() assert not (file_src.filetree / "arr_0" / ".zgroup").exists() assert not (file_src.filetree / "arr_0" / ".zgroup").is_file() assert not (file_src.filetree / "arr_0" / ".zgroup").is_dir() assert not (file_src.filetree / ".zgroup" / "0").exists() assert not (file_src.filetree / ".zgroup" / "0").is_file() assert not (file_src.filetree / ".zgroup" / "0").is_dir() assert not (file_src.filetree / "arr_2" / "0").exists() assert not (file_src.filetree / "arr_2" / "0").is_file() assert not (file_src.filetree / "arr_2" / "0").is_dir()
def test(directory_path): base = zarr.open(directory_path, mode='r+') downsize_dimensions = np.asarray(base.shape) / np.asarray(base.chunks) if (np.unique(downsize_dimensions).size != 1): print("not all dimensions reduce equally; should never happen?") downsize_factor = int(downsize_dimensions[0]**(1 / 2)) downsize_factor -= 1 # already have first level of pyramid small_image = base[:] #currently must fit into RAM, needs to scale levels = [] while (downsize_factor >= 0): small_image = countless(small_image) newLevel = zarr.array(small_image, chunks=base.chunks) zarr.save(os.path.join(directory_path, str(downsize_factor)), newLevel) downsize_factor -= 1
def test_upload_different_zarr(tmp_path: Path, zarr_dandiset: SampleDandiset) -> None: asset = zarr_dandiset.dandiset.get_asset_by_path("sample.zarr") assert isinstance(asset, RemoteZarrAsset) zarr_id = asset.zarr rmtree(zarr_dandiset.dspath / "sample.zarr") zarr.save(zarr_dandiset.dspath / "sample.zarr", np.eye(5)) zarr_dandiset.upload() asset = zarr_dandiset.dandiset.get_asset_by_path("sample.zarr") assert isinstance(asset, RemoteZarrAsset) assert asset.zarr == zarr_id download(zarr_dandiset.dandiset.version_api_url, tmp_path) assert_dirtrees_eq( zarr_dandiset.dspath / "sample.zarr", tmp_path / zarr_dandiset.dandiset_id / "sample.zarr", )
def save(self, prefix, save_dir): # save the array save_path = os.path.join(save_dir, prefix + '.zarr') zarr.save(save_path, self.hypercubes) # save the tracks data save_path = os.path.join(save_dir, prefix + '.npy') arr = [] for t in self.tracks_list: arr.append([t]) arr = np.concatenate(arr) np.save(save_path, arr, allow_pickle=False) # save tracks info save_path = os.path.join(save_dir, prefix + '.csv') self.tracks_info.to_csv(save_path) m0 = f'Hypercube array, tracks data, and tracks info for {prefix} ' m1 = f'saved at {save_dir}' print(m0 + m1)
def merge_zarrs(zarr_paths, output_path): arrays_to_merge = [] for zarr_path in zarr_paths: arrays_to_merge.append(zarr.open(zarr_path)) print("Opened", len(arrays_to_merge), "arrays") print(arrays_to_merge[0]) print(arrays_to_merge[0].shape) merged_array = numpy.concatenate(arrays_to_merge, axis=1) print(merged_array.shape) output_zarr = zarr.save(output_path, merged_array)
def block2row(array, row, folder, block_id=None): if array.shape[0] == windowSize: # Parameters name_string = str(block_id[0] + 1) m,n = array.shape u = m + 1 - windowSize v = n + 1 - windowSize # Get Starting block indices start_idx = np.arange(u)[:,None]*n + np.arange(v) # Get offsetted indices across the height and width of input array offset_idx = np.arange(windowSize)[:,None]*n + np.arange(windowSize) # Get all actual indices & index into input array for final output flat_array = np.take(array,start_idx.ravel()[:,None] + offset_idx.ravel()) # Save to (dask) array in .zarr format file_name = path + folder + name_string + 'r' + row + '.zarr' zarr.save(file_name, flat_array) return array
def test_zarr(selenium): import numpy as np import zarr from numcodecs import Blosc # basic test z = zarr.zeros((1000, 1000), chunks=(100, 100), dtype="i4") assert z.shape == (1000, 1000) # test assignment z[0, :] = np.arange(1000) assert z[0, 1] == 1 # test saving and loading a1 = np.arange(10) zarr.save("/tmp/example.zarr", a1) a2 = zarr.load("/tmp/example.zarr") np.testing.assert_equal(a1, a2) # test compressor compressor = Blosc(cname="zstd", clevel=3, shuffle=Blosc.BITSHUFFLE) data = np.arange(10000, dtype="i4").reshape(100, 100) z = zarr.array(data, chunks=(10, 10), compressor=compressor) assert z.compressor == compressor
def checkpointer(iterate: Iterate): zarr.save('data/jacobi.zarr', iterate['x'])
def save(self, path, data): return zarr.save(path, data)
def create_dummy_data_source(size=10000000, chunksize=10000): a = np.random.choice(a=[0, 1], size=(size, 1024)) b = np.array(a, dtype=np.uint16) z = zarr.array(b, chunks=(chunksize, 1024)) zarr.save("/home/dask/zarr_uint16_test.zarr", z)
def write_zarr(): zarr.save('test.zarr', arr)
def _4(obj : np.ndarray) -> ConstraintDirectoryFormat : # for C in generate data, or generate constraint ff = ConstraintDirectoryFormat() filename = str(ff.path/'cmatrix.zip') zarr.save(filename, obj) return ff
def main(): args = parser.parse_args() output_dir = args.output_dir os.makedirs(output_dir, exist_ok=True) root = 'data/kitti' roi_align = ROIAlign((224, 224), 1.0, 0) if args.splits == 'trainval': splits = ['train', 'val'] else: splits = [args.splits] masker = Masker(args.masker_thresh) for split in splits: prediction_pth = args.prediction_template % split predictions = torch.load(prediction_pth) left_predictions, right_predictions = predictions['left'], predictions[ 'right'] os.makedirs(os.path.join(output_dir, split, 'image', 'left'), exist_ok=True) os.makedirs(os.path.join(output_dir, split, 'image', 'right'), exist_ok=True) os.makedirs(os.path.join(output_dir, split, 'label'), exist_ok=True) os.makedirs(os.path.join(output_dir, split, 'disparity'), exist_ok=True) if args.cls == 'car': ds = KITTIObjectDatasetCar(root, split, filter_empty=False, shape_prior_base=args.shape_prior_base) elif args.cls == 'pedestrian': ds = KITTIObjectDatasetPedestrian( root, split, filter_empty=False, shape_prior_base=args.shape_prior_base) else: # cyclist ds = KITTIObjectDatasetCyclist(root, split, filter_empty=False, shape_prior_base='notused') wrote = 0 assert len(left_predictions) == len(ds) for i, (images, targets, _) in enumerate(tqdm(ds)): leftimg, rightimg = images['left'], images['right'] leftanno, rightanno = targets['left'], targets['right'] left_prediction_per_img = left_predictions[i].resize(leftimg.size) right_prediction_per_img = right_predictions[i].resize( leftimg.size) calib = leftanno.get_field('calib') if len(leftanno) == 0 or len(left_prediction_per_img) == 0: continue imgid: int = leftanno.get_field('imgid')[0, 0].item() # os.makedirs(osp.join(output_dir, split, 'imgid_org_left', str(imgid)), exist_ok=True) masks_per_img = masker([left_prediction_per_img.get_field('mask')], [left_prediction_per_img])[0].squeeze(1) disparity_per_img = leftanno.get_map('disparity') assert len(left_prediction_per_img.bbox) == len( right_prediction_per_img.bbox) == len(masks_per_img) rois_for_image_crop_left = [] rois_for_image_crop_right = [] fxus, x1s, x1ps, x2s, x2ps, y1s, y2s = [], [], [], [], [], [], [] roi_masks = [] roi_disps = [] for j, (left_bbox, right_bbox, mask) in enumerate( zip(left_prediction_per_img.bbox, right_prediction_per_img.bbox, masks_per_img)): x1, y1, x2, y2 = expand_box_to_integer(left_bbox.tolist()) x1p, _, x2p, _ = expand_box_to_integer(right_bbox.tolist()) max_width = max(x2 - x1, x2p - x1p) max_width = min(max_width, leftimg.width - x1) allow_extend_width = min(left_prediction_per_img.width - x1, left_prediction_per_img.width - x1p) max_width = min(max_width, allow_extend_width) rois_for_image_crop_left.append( [0, x1, y1, x1 + max_width, y2]) rois_for_image_crop_right.append( [0, x1p, y1, x1p + max_width, y2]) x1s.append(x1) x1ps.append(x1p) x2s.append(x1 + max_width) x2ps.append(x1p + max_width) y1s.append(y1) y2s.append(y2) roi_mask = mask[y1:y2, x1:x1 + max_width] roi_mask = SegmentationMask( roi_mask, (roi_mask.shape[1], roi_mask.shape[0]), mode='mask') roi_mask = roi_mask.resize((224, 224)) # roi_masks.append(roi_mask) roi_disparity = disparity_per_img.crop( (x1, y1, x1 + max_width, y2)).data dispfg_mask = SegmentationMask( roi_disparity != 0, (roi_disparity.shape[1], roi_disparity.shape[0]), mode='mask').resize((224, 224)).get_mask_tensor() roi_disparity = roi_disparity - (x1 - x1p) roi_disparity = DisparityMap(roi_disparity).resize( (224, 224)).data # pdb.set_trace() roi_masks.append(roi_mask) roi_disps.append(roi_disparity) # crop and resize image leftimg = F.to_tensor(leftimg).unsqueeze(0) rightimg = F.to_tensor(rightimg).unsqueeze(0) rois_for_image_crop_left = torch.as_tensor( rois_for_image_crop_left).float() rois_for_image_crop_right = torch.as_tensor( rois_for_image_crop_right).float() roi_left_imgs = roi_align(leftimg, rois_for_image_crop_left) roi_right_imgs = roi_align(rightimg, rois_for_image_crop_right) for j in range(len(roi_left_imgs)): zarr.save( osp.join(output_dir, split, 'image/left', str(wrote) + '.zarr'), roi_left_imgs[j].numpy()) zarr.save( osp.join(output_dir, split, 'image/right', str(wrote) + '.zarr'), roi_right_imgs[j].numpy()) zarr.save( osp.join(output_dir, split, 'disparity', str(wrote) + '.zarr'), roi_disps[j].numpy()) out_path = os.path.join(output_dir, split, 'label', str(wrote) + '.pkl') pickle.dump( { 'mask': roi_masks[j], 'x1': x1s[j], 'y1': y1s[j], 'x2': x2s[j], 'y2': y2s[j], 'x1p': x1ps[j], 'x2p': x2ps[j], 'fuxb': calib.stereo_fuxbaseline, 'imgid': imgid }, open(out_path, 'wb')) wrote += 1 print(f'made {wrote} pairs for {split}.')
def zarr_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset: zarr.save(new_dandiset.dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1)) new_dandiset.upload() return new_dandiset