Ejemplo n.º 1
0
def test_upload_zarr_to_nonzarr_path(new_dandiset: SampleDandiset,
                                     tmp_path: Path) -> None:
    d = new_dandiset.dandiset
    dspath = new_dandiset.dspath
    (dspath / "sample.zarr").write_text("This is not a Zarr.\n")
    new_dandiset.upload(allow_any_path=True)

    (asset, ) = d.get_assets()
    assert isinstance(asset, RemoteBlobAsset)
    assert asset.asset_type is AssetType.BLOB
    assert asset.path == "sample.zarr"
    assert asset.get_raw_metadata(
    )["encodingFormat"] == "application/octet-stream"

    (dspath / "sample.zarr").unlink()
    zarr.save(dspath / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1))
    new_dandiset.upload(allow_any_path=True)

    (asset, ) = d.get_assets()
    assert isinstance(asset, RemoteZarrAsset)
    assert asset.asset_type is AssetType.ZARR
    assert asset.path == "sample.zarr"
    assert asset.get_raw_metadata()["encodingFormat"] == ZARR_MIME_TYPE

    download(d.version_api_url, tmp_path)
    assert_dirtrees_eq(dspath / "sample.zarr",
                       tmp_path / d.identifier / "sample.zarr")
Ejemplo n.º 2
0
def test_zarr_properties(tmp_path: Path) -> None:
    # This test assumes that the Zarr serialization format never changes
    filepath = tmp_path / "example.zarr"
    dt = np.dtype("<i8")
    zarr.save(filepath, np.arange(1000, dtype=dt),
              np.arange(1000, 0, -1, dtype=dt))
    zf = dandi_file(filepath)
    assert isinstance(zf, ZarrAsset)
    assert zf.filetree.size == 1516
    assert zf.filetree.get_digest(
    ).value == "4313ab36412db2981c3ed391b38604d6-5--1516"
    entries = sorted(zf.iterfiles(include_dirs=True), key=attrgetter("parts"))
    assert [(str(e), e.size, e.get_digest().value) for e in entries] == [
        (".zgroup", 24, "e20297935e73dd0154104d4ea53040ab"),
        ("arr_0", 746, "51c74ec257069ce3a555bdddeb50230a-2--746"),
        ("arr_0/.zarray", 315, "9e30a0a1a465e24220d4132fdd544634"),
        ("arr_0/0", 431, "ed4e934a474f1d2096846c6248f18c00"),
        ("arr_1", 746, "7b99a0ad9bd8bb3331657e54755b1a31-2--746"),
        ("arr_1/.zarray", 315, "9e30a0a1a465e24220d4132fdd544634"),
        ("arr_1/0", 431, "fba4dee03a51bde314e9713b00284a93"),
    ]
    assert zf.get_digest().value == "4313ab36412db2981c3ed391b38604d6-5--1516"
    stat = zf.stat()
    assert stat.size == 1516
    assert stat.digest.value == "4313ab36412db2981c3ed391b38604d6-5--1516"
    assert sorted(stat.files, key=attrgetter("parts")) == [
        e for e in entries if e.is_file()
    ]
Ejemplo n.º 3
0
def test_upload_zarr(new_dandiset: SampleDandiset) -> None:
    zarr.save(new_dandiset.dspath / "sample.zarr", np.arange(1000),
              np.arange(1000, 0, -1))
    new_dandiset.upload()
    (asset, ) = new_dandiset.dandiset.get_assets()
    assert isinstance(asset, RemoteZarrAsset)
    assert asset.asset_type is AssetType.ZARR
    assert asset.path == "sample.zarr"
Ejemplo n.º 4
0
def test_upload_sync_zarr(mocker, zarr_dandiset):
    rmtree(zarr_dandiset.dspath / "sample.zarr")
    zarr.save(zarr_dandiset.dspath / "identity.zarr", np.eye(5))
    confirm_mock = mocker.patch("click.confirm", return_value=True)
    zarr_dandiset.upload(sync=True)
    confirm_mock.assert_called_with("Delete 1 asset on server?")
    zarr_dandiset.dandiset.get_asset_by_path("identity.zarr")
    with pytest.raises(NotFoundError):
        zarr_dandiset.dandiset.get_asset_by_path("sample.zarr")
Ejemplo n.º 5
0
def merge_zarrs(zarr_paths, output_path):

    s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name='us-east-1'))
    arrays_to_merge = []
    for zarr_path in zarr_paths:
        store = s3fs.S3Map(root=zarr_path[len("s3://"):], s3=s3, check=False)
        arr = zarr.Array(store)
        arrays_to_merge.append(arr)
    merged_array = numpy.concatenate(arrays_to_merge, axis=1)
    zarr.save(output_path, merged_array)
Ejemplo n.º 6
0
def test_digest_zarr():
    # This test assumes that the Zarr serialization format never changes
    runner = CliRunner()
    with runner.isolated_filesystem():
        dt = np.dtype("<i8")
        zarr.save("sample.zarr", np.arange(1000, dtype=dt),
                  np.arange(1000, 0, -1, dtype=dt))
        r = runner.invoke(digest, ["--digest", "zarr-checksum", "sample.zarr"])
        assert r.exit_code == 0
        assert r.output == "sample.zarr: 4313ab36412db2981c3ed391b38604d6-5--1516\n"
Ejemplo n.º 7
0
def test_upload_zarr_with_empty_dir(new_dandiset: SampleDandiset) -> None:
    zarr.save(new_dandiset.dspath / "sample.zarr", np.arange(1000),
              np.arange(1000, 0, -1))
    (new_dandiset.dspath / "sample.zarr" / "empty").mkdir()
    new_dandiset.upload()
    (asset, ) = new_dandiset.dandiset.get_assets()
    assert isinstance(asset, RemoteZarrAsset)
    assert asset.asset_type is AssetType.ZARR
    assert asset.path == "sample.zarr"
    assert not (asset.filetree / "empty").exists()
Ejemplo n.º 8
0
def test_download_different_zarr(tmp_path: Path, zarr_dandiset: SampleDandiset) -> None:
    dd = tmp_path / zarr_dandiset.dandiset_id
    dd.mkdir()
    zarr.save(dd / "sample.zarr", np.eye(5))
    download(
        zarr_dandiset.dandiset.version_api_url, tmp_path, existing="overwrite-different"
    )
    assert_dirtrees_eq(
        zarr_dandiset.dspath / "sample.zarr",
        tmp_path / zarr_dandiset.dandiset_id / "sample.zarr",
    )
Ejemplo n.º 9
0
def test_download_nonzarr_to_zarr_path(new_dandiset: SampleDandiset,
                                       tmp_path: Path) -> None:
    d = new_dandiset.dandiset
    (new_dandiset.dspath / "sample.zarr").write_text("This is not a Zarr.\n")
    new_dandiset.upload(allow_any_path=True)
    dd = tmp_path / d.identifier
    dd.mkdir(parents=True, exist_ok=True)
    zarr.save(dd / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1))
    download(d.version_api_url, tmp_path, existing="overwrite-different")
    assert (dd / "sample.zarr").is_file()
    assert (dd / "sample.zarr").read_text() == "This is not a Zarr.\n"
Ejemplo n.º 10
0
def main():
    # another_test()
    args = get_args()
    arr = zarr_array(args)
    print(arr.chunks)
    run_test(args, arr, 'Zarr')
    p = os.path.join(args.path, '_zarr')
    zarr.save(p, arr)
    arr = zarr.open(p, 'r')
    print(arr.chunks)
    arr = hub_array(args)
    run_test(args, arr, 'Hub')
Ejemplo n.º 11
0
def test_download_different_zarr_delete_dir(new_dandiset: SampleDandiset,
                                            tmp_path: Path) -> None:
    d = new_dandiset.dandiset
    dspath = new_dandiset.dspath
    zarr.save(dspath / "sample.zarr", np.eye(5))
    assert not any(p.is_dir() for p in (dspath / "sample.zarr").iterdir())
    new_dandiset.upload()
    dd = tmp_path / d.identifier
    dd.mkdir(parents=True, exist_ok=True)
    zarr.save(dd / "sample.zarr", np.arange(1000), np.arange(1000, 0, -1))
    assert any(p.is_dir() for p in (dd / "sample.zarr").iterdir())
    download(d.version_api_url, tmp_path, existing="overwrite-different")
    assert_dirtrees_eq(dspath / "sample.zarr", dd / "sample.zarr")
Ejemplo n.º 12
0
def test_upload_zarr(new_dandiset, tmp_path):
    filepath = tmp_path / "example.zarr"
    zarr.save(filepath, np.arange(1000), np.arange(1000, 0, -1))
    zf = dandi_file(filepath)
    assert isinstance(zf, ZarrAsset)
    asset = zf.upload(new_dandiset.dandiset, {"description": "A test Zarr"})
    assert isinstance(asset, RemoteZarrAsset)
    assert asset.asset_type is AssetType.ZARR
    assert asset.path == "example.zarr"
    md = asset.get_raw_metadata()
    assert md["encodingFormat"] == ZARR_MIME_TYPE
    assert md["description"] == "A test Zarr"
    md["description"] = "A modified Zarr"
    asset.set_raw_metadata(md)
    md = asset.get_raw_metadata()
    assert md["description"] == "A modified Zarr"

    for file_src in [zf, asset]:
        lgr.debug("Traversing %s", type(file_src).__name__)
        entries = sorted(file_src.iterfiles(include_dirs=True),
                         key=attrgetter("parts"))
        assert [str(e) for e in entries] == [
            ".zgroup",
            "arr_0",
            "arr_0/.zarray",
            "arr_0/0",
            "arr_1",
            "arr_1/.zarray",
            "arr_1/0",
        ]
        assert (file_src.filetree / ".zgroup").exists()
        assert (file_src.filetree / ".zgroup").is_file()
        assert not (file_src.filetree / ".zgroup").is_dir()
        assert (file_src.filetree / "arr_0").exists()
        assert not (file_src.filetree / "arr_0").is_file()
        assert (file_src.filetree / "arr_0").is_dir()
        assert not (file_src.filetree / "0").exists()
        assert not (file_src.filetree / "0").is_file()
        assert not (file_src.filetree / "0").is_dir()
        assert not (file_src.filetree / "arr_0" / ".zgroup").exists()
        assert not (file_src.filetree / "arr_0" / ".zgroup").is_file()
        assert not (file_src.filetree / "arr_0" / ".zgroup").is_dir()
        assert not (file_src.filetree / ".zgroup" / "0").exists()
        assert not (file_src.filetree / ".zgroup" / "0").is_file()
        assert not (file_src.filetree / ".zgroup" / "0").is_dir()
        assert not (file_src.filetree / "arr_2" / "0").exists()
        assert not (file_src.filetree / "arr_2" / "0").is_file()
        assert not (file_src.filetree / "arr_2" / "0").is_dir()
Ejemplo n.º 13
0
def test(directory_path):
    base = zarr.open(directory_path, mode='r+')
    downsize_dimensions = np.asarray(base.shape) / np.asarray(base.chunks)
    if (np.unique(downsize_dimensions).size != 1):
        print("not all dimensions reduce equally; should never happen?")
    downsize_factor = int(downsize_dimensions[0]**(1 / 2))

    downsize_factor -= 1  # already have first level of pyramid
    small_image = base[:]  #currently must fit into RAM, needs to scale
    levels = []

    while (downsize_factor >= 0):
        small_image = countless(small_image)
        newLevel = zarr.array(small_image, chunks=base.chunks)
        zarr.save(os.path.join(directory_path, str(downsize_factor)), newLevel)
        downsize_factor -= 1
Ejemplo n.º 14
0
def test_upload_different_zarr(tmp_path: Path,
                               zarr_dandiset: SampleDandiset) -> None:
    asset = zarr_dandiset.dandiset.get_asset_by_path("sample.zarr")
    assert isinstance(asset, RemoteZarrAsset)
    zarr_id = asset.zarr
    rmtree(zarr_dandiset.dspath / "sample.zarr")
    zarr.save(zarr_dandiset.dspath / "sample.zarr", np.eye(5))
    zarr_dandiset.upload()
    asset = zarr_dandiset.dandiset.get_asset_by_path("sample.zarr")
    assert isinstance(asset, RemoteZarrAsset)
    assert asset.zarr == zarr_id
    download(zarr_dandiset.dandiset.version_api_url, tmp_path)
    assert_dirtrees_eq(
        zarr_dandiset.dspath / "sample.zarr",
        tmp_path / zarr_dandiset.dandiset_id / "sample.zarr",
    )
Ejemplo n.º 15
0
 def save(self, prefix, save_dir):
     # save the array
     save_path = os.path.join(save_dir, prefix + '.zarr')
     zarr.save(save_path, self.hypercubes)
     # save the tracks data
     save_path = os.path.join(save_dir, prefix + '.npy')
     arr = []
     for t in self.tracks_list:
         arr.append([t])
     arr = np.concatenate(arr)
     np.save(save_path, arr, allow_pickle=False)
     # save tracks info
     save_path = os.path.join(save_dir, prefix + '.csv')
     self.tracks_info.to_csv(save_path)
     m0 = f'Hypercube array, tracks data, and tracks info for {prefix} '
     m1 = f'saved at {save_dir}'
     print(m0 + m1)
Ejemplo n.º 16
0
def merge_zarrs(zarr_paths, output_path):

    arrays_to_merge = []
    for zarr_path in zarr_paths:
        arrays_to_merge.append(zarr.open(zarr_path))
    print("Opened", len(arrays_to_merge), "arrays")
    print(arrays_to_merge[0])
    print(arrays_to_merge[0].shape)
    merged_array = numpy.concatenate(arrays_to_merge, axis=1)
    print(merged_array.shape)
    output_zarr = zarr.save(output_path, merged_array)
Ejemplo n.º 17
0
def block2row(array, row, folder, block_id=None):
    if array.shape[0] == windowSize:
        # Parameters	
        name_string = str(block_id[0] + 1)
        m,n = array.shape
        u = m + 1 - windowSize
        v = n + 1 - windowSize

    	# Get Starting block indices
        start_idx = np.arange(u)[:,None]*n + np.arange(v)

    	# Get offsetted indices across the height and width of input array
        offset_idx = np.arange(windowSize)[:,None]*n + np.arange(windowSize)

    	# Get all actual indices & index into input array for final output
        flat_array = np.take(array,start_idx.ravel()[:,None] + offset_idx.ravel())

        # Save to (dask) array in .zarr format
        file_name = path + folder + name_string + 'r' + row + '.zarr'
        zarr.save(file_name, flat_array)
    
    return array
Ejemplo n.º 18
0
def test_zarr(selenium):
    import numpy as np
    import zarr
    from numcodecs import Blosc

    # basic test
    z = zarr.zeros((1000, 1000), chunks=(100, 100), dtype="i4")
    assert z.shape == (1000, 1000)

    # test assignment
    z[0, :] = np.arange(1000)
    assert z[0, 1] == 1

    # test saving and loading
    a1 = np.arange(10)
    zarr.save("/tmp/example.zarr", a1)
    a2 = zarr.load("/tmp/example.zarr")
    np.testing.assert_equal(a1, a2)

    # test compressor
    compressor = Blosc(cname="zstd", clevel=3, shuffle=Blosc.BITSHUFFLE)
    data = np.arange(10000, dtype="i4").reshape(100, 100)
    z = zarr.array(data, chunks=(10, 10), compressor=compressor)
    assert z.compressor == compressor
Ejemplo n.º 19
0
def checkpointer(iterate: Iterate):
    zarr.save('data/jacobi.zarr', iterate['x'])
Ejemplo n.º 20
0
 def save(self, path, data):
     return zarr.save(path, data)
Ejemplo n.º 21
0
def create_dummy_data_source(size=10000000, chunksize=10000):
    a = np.random.choice(a=[0, 1], size=(size, 1024))
    b = np.array(a, dtype=np.uint16)
    z = zarr.array(b, chunks=(chunksize, 1024))
    zarr.save("/home/dask/zarr_uint16_test.zarr", z)
Ejemplo n.º 22
0
 def write_zarr():
     zarr.save('test.zarr', arr)
Ejemplo n.º 23
0
def _4(obj : np.ndarray) -> ConstraintDirectoryFormat :
    # for C in generate data, or generate constraint
    ff = ConstraintDirectoryFormat()
    filename = str(ff.path/'cmatrix.zip')
    zarr.save(filename, obj)
    return ff
Ejemplo n.º 24
0
def main():
    args = parser.parse_args()
    output_dir = args.output_dir
    os.makedirs(output_dir, exist_ok=True)
    root = 'data/kitti'
    roi_align = ROIAlign((224, 224), 1.0, 0)
    if args.splits == 'trainval':
        splits = ['train', 'val']
    else:
        splits = [args.splits]
    masker = Masker(args.masker_thresh)
    for split in splits:
        prediction_pth = args.prediction_template % split
        predictions = torch.load(prediction_pth)
        left_predictions, right_predictions = predictions['left'], predictions[
            'right']
        os.makedirs(os.path.join(output_dir, split, 'image', 'left'),
                    exist_ok=True)
        os.makedirs(os.path.join(output_dir, split, 'image', 'right'),
                    exist_ok=True)
        os.makedirs(os.path.join(output_dir, split, 'label'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, split, 'disparity'),
                    exist_ok=True)
        if args.cls == 'car':
            ds = KITTIObjectDatasetCar(root,
                                       split,
                                       filter_empty=False,
                                       shape_prior_base=args.shape_prior_base)
        elif args.cls == 'pedestrian':
            ds = KITTIObjectDatasetPedestrian(
                root,
                split,
                filter_empty=False,
                shape_prior_base=args.shape_prior_base)
        else:  # cyclist
            ds = KITTIObjectDatasetCyclist(root,
                                           split,
                                           filter_empty=False,
                                           shape_prior_base='notused')

        wrote = 0
        assert len(left_predictions) == len(ds)
        for i, (images, targets, _) in enumerate(tqdm(ds)):
            leftimg, rightimg = images['left'], images['right']
            leftanno, rightanno = targets['left'], targets['right']
            left_prediction_per_img = left_predictions[i].resize(leftimg.size)
            right_prediction_per_img = right_predictions[i].resize(
                leftimg.size)

            calib = leftanno.get_field('calib')
            if len(leftanno) == 0 or len(left_prediction_per_img) == 0:
                continue
            imgid: int = leftanno.get_field('imgid')[0, 0].item()
            # os.makedirs(osp.join(output_dir, split, 'imgid_org_left', str(imgid)), exist_ok=True)
            masks_per_img = masker([left_prediction_per_img.get_field('mask')],
                                   [left_prediction_per_img])[0].squeeze(1)
            disparity_per_img = leftanno.get_map('disparity')
            assert len(left_prediction_per_img.bbox) == len(
                right_prediction_per_img.bbox) == len(masks_per_img)
            rois_for_image_crop_left = []
            rois_for_image_crop_right = []
            fxus, x1s, x1ps, x2s, x2ps, y1s, y2s = [], [], [], [], [], [], []
            roi_masks = []
            roi_disps = []
            for j, (left_bbox, right_bbox, mask) in enumerate(
                    zip(left_prediction_per_img.bbox,
                        right_prediction_per_img.bbox, masks_per_img)):
                x1, y1, x2, y2 = expand_box_to_integer(left_bbox.tolist())
                x1p, _, x2p, _ = expand_box_to_integer(right_bbox.tolist())
                max_width = max(x2 - x1, x2p - x1p)
                max_width = min(max_width, leftimg.width - x1)
                allow_extend_width = min(left_prediction_per_img.width - x1,
                                         left_prediction_per_img.width - x1p)
                max_width = min(max_width, allow_extend_width)
                rois_for_image_crop_left.append(
                    [0, x1, y1, x1 + max_width, y2])
                rois_for_image_crop_right.append(
                    [0, x1p, y1, x1p + max_width, y2])
                x1s.append(x1)
                x1ps.append(x1p)
                x2s.append(x1 + max_width)
                x2ps.append(x1p + max_width)
                y1s.append(y1)
                y2s.append(y2)

                roi_mask = mask[y1:y2, x1:x1 + max_width]
                roi_mask = SegmentationMask(
                    roi_mask, (roi_mask.shape[1], roi_mask.shape[0]),
                    mode='mask')
                roi_mask = roi_mask.resize((224, 224))
                # roi_masks.append(roi_mask)
                roi_disparity = disparity_per_img.crop(
                    (x1, y1, x1 + max_width, y2)).data
                dispfg_mask = SegmentationMask(
                    roi_disparity != 0,
                    (roi_disparity.shape[1], roi_disparity.shape[0]),
                    mode='mask').resize((224, 224)).get_mask_tensor()

                roi_disparity = roi_disparity - (x1 - x1p)
                roi_disparity = DisparityMap(roi_disparity).resize(
                    (224, 224)).data
                # pdb.set_trace()
                roi_masks.append(roi_mask)
                roi_disps.append(roi_disparity)
            # crop and resize image
            leftimg = F.to_tensor(leftimg).unsqueeze(0)
            rightimg = F.to_tensor(rightimg).unsqueeze(0)
            rois_for_image_crop_left = torch.as_tensor(
                rois_for_image_crop_left).float()
            rois_for_image_crop_right = torch.as_tensor(
                rois_for_image_crop_right).float()
            roi_left_imgs = roi_align(leftimg, rois_for_image_crop_left)
            roi_right_imgs = roi_align(rightimg, rois_for_image_crop_right)
            for j in range(len(roi_left_imgs)):
                zarr.save(
                    osp.join(output_dir, split, 'image/left',
                             str(wrote) + '.zarr'), roi_left_imgs[j].numpy())
                zarr.save(
                    osp.join(output_dir, split, 'image/right',
                             str(wrote) + '.zarr'), roi_right_imgs[j].numpy())
                zarr.save(
                    osp.join(output_dir, split, 'disparity',
                             str(wrote) + '.zarr'), roi_disps[j].numpy())
                out_path = os.path.join(output_dir, split, 'label',
                                        str(wrote) + '.pkl')
                pickle.dump(
                    {
                        'mask': roi_masks[j],
                        'x1': x1s[j],
                        'y1': y1s[j],
                        'x2': x2s[j],
                        'y2': y2s[j],
                        'x1p': x1ps[j],
                        'x2p': x2ps[j],
                        'fuxb': calib.stereo_fuxbaseline,
                        'imgid': imgid
                    }, open(out_path, 'wb'))
                wrote += 1
        print(f'made {wrote} pairs for {split}.')
Ejemplo n.º 25
0
def zarr_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset:
    zarr.save(new_dandiset.dspath / "sample.zarr", np.arange(1000),
              np.arange(1000, 0, -1))
    new_dandiset.upload()
    return new_dandiset