Ejemplo n.º 1
0
def check_stored_cv_files(dataset_name="basil"):
    """ Tests if all files were downloaded

    :param dataset_name: str
    """
    if "basil" == dataset_name:
        cv_url = "gs://nkem/basil_4k_oldnet/region_graph/"
    elif "pinky40" == dataset_name:
        cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/"
    elif "pinky100" == dataset_name:
        cv_url = "gs://nkem/pinky100_v0/region_graph/"
    else:
        raise Exception("Could not identify region graph ressource")

    with storage.SimpleStorage(cv_url) as cv_st:
        dir_path = creator_utils.dir_from_layer_name(
            creator_utils.layer_name_from_cv_url(cv_st.layer_path))

        file_paths = list(cv_st.list_files())

    c = 0
    n_file_paths = len(file_paths)
    time_start = time.time()
    for i_fp, fp in enumerate(file_paths):
        if i_fp % 1000 == 1:
            dt = time.time() - time_start
            eta = dt / i_fp * n_file_paths - dt
            print("%d / %d - dt: %.3fs - eta: %.3fs" %
                  (i_fp, n_file_paths, dt, eta))

        if not os.path.exists(dir_path + fp[:-4] + ".h5"):
            print(dir_path + fp[:-4] + ".h5")
            c += 1

    print("%d files were missing" % c)
Ejemplo n.º 2
0
def create_skeleton_layer(s3_bucket, skel_res, img_dims, num_res=7):
    """Creates segmentation layer for skeletons

    Arguments:
        s3_bucket {str} -- path to precomputed skeleton destination
        skel_res {list} -- x,y,z dimensions of highest res voxel size (nm)
        img_dims {list} -- x,y,z voxel dimensions of tiff images

    Keyword Arguments:
        num_res {int} -- number of image resolutions to be downsampled

    Returns:
        vol {cloudvolume.CloudVolume} -- CloudVolume to upload skeletons to
    """
    # create cloudvolume info
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type="segmentation",
        data_type="uint64",  # Channel images might be 'uint8'
        encoding="raw",  # raw, jpeg, compressed_segmentation, fpzip, kempressed
        # Voxel scaling, units are in nanometers
        resolution=skel_res,
        voxel_offset=[0, 0, 0],  # x,y,z offset in voxels from the origin
        # Pick a convenient size for your underlying chunk representation
        # Powers of two are recommended, doesn't need to cover image exactly
        chunk_size=[int(i / 4) for i in img_dims],
        # chunk_size=[128, 128, 64],  # units are voxels
        volume_size=[i * 2 ** (num_res - 1) for i in img_dims],  # units are voxels
        skeletons="skeletons",
    )
    skel_info = {
        "@type": "neuroglancer_skeletons",
        "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
        "vertex_attributes": [
            {"id": "radius", "data_type": "float32", "num_components": 1},
            {"id": "vertex_types", "data_type": "float32", "num_components": 1},
            {"id": "vertex_color", "data_type": "float32", "num_components": 4},
        ],
    }
    # get cloudvolume info
    vol = CloudVolume(s3_bucket, info=info, parallel=True)
    [vol.add_scale((2 ** i, 2 ** i, 2 ** i)) for i in range(num_res)]  # num_res - 1
    vol.commit_info()

    # upload skeleton info to /skeletons/ dir
    with storage.SimpleStorage(vol.cloudpath) as stor:
        stor.put_json(str(Path("skeletons") / "info"), skel_info)

    return vol
Ejemplo n.º 3
0
def _download_and_store_cv_files_thread(args):
    """ Helper thread to download files from google cloud """
    chunk_id, cv_url, file_paths, olduint32 = args

    # Reset connection pool to make cloud-volume compatible with parallelizing
    storage.reset_connection_pools()

    n_file_paths = len(file_paths)
    time_start = time.time()
    with storage.SimpleStorage(cv_url) as cv_st:
        for i_fp, fp in enumerate(file_paths):
            if i_fp % 100 == 1:
                dt = time.time() - time_start
                eta = dt / i_fp * n_file_paths - dt
                print("%d: %d / %d - dt: %.3fs - eta: %.3fs" %
                      (chunk_id, i_fp, n_file_paths, dt, eta))

            creator_utils.download_and_store_edge_file(cv_st, fp)
Ejemplo n.º 4
0
def download_and_store_cv_files(dataset_name="basil",
                                n_threads=10,
                                olduint32=False):
    """ Downloads files from google cloud using cloud-volume

    :param dataset_name: str
    :param n_threads: int
    :param olduint32: bool
    """
    if "basil" == dataset_name:
        cv_url = "gs://nkem/basil_4k_oldnet/region_graph/"
    elif "pinky40" == dataset_name:
        cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/"
    elif "pinky100" == dataset_name:
        cv_url = "gs://nkem/pinky100_v0/region_graph/"
    else:
        raise Exception("Could not identify region graph ressource")

    with storage.SimpleStorage(cv_url) as cv_st:
        dir_path = creator_utils.dir_from_layer_name(
            creator_utils.layer_name_from_cv_url(cv_st.layer_path))

        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

        file_paths = list(cv_st.list_files())

    file_chunks = np.array_split(file_paths, n_threads * 3)
    multi_args = []
    for i_file_chunk, file_chunk in enumerate(file_chunks):
        multi_args.append([i_file_chunk, cv_url, file_chunk, olduint32])

    # Run parallelizing
    if n_threads == 1:
        mu.multiprocess_func(_download_and_store_cv_files_thread,
                             multi_args,
                             n_threads=n_threads,
                             verbose=True,
                             debug=n_threads == 1)
    else:
        mu.multisubprocess_func(_download_and_store_cv_files_thread,
                                multi_args,
                                n_threads=n_threads)
Ejemplo n.º 5
0
def create_cloud_volume(
    precomputed_path: str,
    img_size: Sequence[int],
    voxel_size: Sequence[Union[int, float]],
    num_resolutions: int,
    chunk_size: Optional[Sequence[int]] = None,
    parallel: Optional[bool] = False,
    layer_type: Optional[str] = "image",
    dtype: Optional[str] = None,
    commit_info: Optional[bool] = True,
) -> CloudVolumePrecomputed:
    """Create CloudVolume object and info file.

    Handles both image volumes and segmentation volumes from octree structure.

    Arguments:
        precomputed_path: cloudvolume path
        img_size: x, y, z voxel dimensions of tiff images.
        voxel_size: x, y, z dimensions of highest res voxel size (nm).
        num_resolutions: The number of resolutions to upload.
        chunk_size: The size of chunks to use for upload. If None, uses img_size/2.
        parallel: Whether to upload chunks in parallel.
        layer_type: The type of cloudvolume object to create.
        dtype: The data type of the volume. If None, uses default for layer type.
        commit_info: Whether to create an info file at the path, defaults to True.
    Returns:
        vol: Volume designated for upload.
    """
    # defaults
    if chunk_size is None:
        chunk_size = [int(i / 4) for i in img_size]  # /2 took 42 hrs
    if dtype is None:
        if layer_type == "image":
            dtype = "uint16"
        elif layer_type == "segmentation" or layer_type == "annotation":
            dtype = "uint64"
        else:
            raise ValueError(
                f"layer type is {layer_type}, when it should be image or str")

    # check inputs
    check_precomputed(precomputed_path)
    check_size(img_size, allow_float=False)
    check_size(voxel_size)
    check_type(num_resolutions, (int, np.integer))
    if num_resolutions < 1:
        raise ValueError(
            f"Number of resolutions should be > 0, not {num_resolutions}")
    check_size(chunk_size)
    check_type(parallel, bool)
    check_type(layer_type, str)
    if layer_type not in ["image", "segmentation", "annotation"]:
        raise ValueError(
            f"{layer_type} should be 'image', 'segmentation', or 'annotation'")
    check_type(dtype, str)
    if dtype not in ["uint16", "uint64"]:
        raise ValueError(f"{dtype} should be 'uint16' or 'uint64'")
    check_type(commit_info, bool)

    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type=layer_type,
        data_type=dtype,  # Channel images might be 'uint8'
        encoding="raw",  # raw, jpeg, compressed_segmentation, fpzip, kempressed
        resolution=voxel_size,  # Voxel scaling, units are in nanometers
        voxel_offset=[0, 0, 0],  # x,y,z offset in voxels from the origin
        chunk_size=chunk_size,  # units are voxels
        volume_size=[i * 2**(num_resolutions - 1) for i in img_size],
    )
    vol = CloudVolume(precomputed_path, info=info, parallel=parallel)
    [
        vol.add_scale((2**i, 2**i, 2**i), chunk_size=chunk_size)
        for i in range(num_resolutions)
    ]
    if commit_info:
        vol.commit_info()
    if layer_type == "image" or layer_type == "annotation":
        vols = [
            CloudVolume(precomputed_path, mip=i, parallel=parallel)
            for i in range(num_resolutions - 1, -1, -1)
        ]
    elif layer_type == "segmentation":
        info.update(skeletons="skeletons")

        skel_info = {
            "@type":
            "neuroglancer_skeletons",
            "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
            "vertex_attributes": [
                {
                    "id": "radius",
                    "data_type": "float32",
                    "num_components": 1
                },
                {
                    "id": "vertex_types",
                    "data_type": "float32",
                    "num_components": 1
                },
                {
                    "id": "vertex_color",
                    "data_type": "float32",
                    "num_components": 4
                },
            ],
        }
        with storage.SimpleStorage(vol.cloudpath) as stor:
            stor.put_json(str(Path("skeletons") / "info"), skel_info)
        vols = [vol]
    return vols
Ejemplo n.º 6
0
def create_cloud_volume(
    precomputed_path,
    img_size,
    voxel_size,
    num_resolutions=2,
    chunk_size=None,
    parallel=False,
    layer_type="image",
    dtype=None,
):
    """Create CloudVolume volume object and info file.

    Arguments:
        precomputed_path {str} -- cloudvolume path
        img_size {list} -- x,y,z voxel dimensions of tiff images
        voxel_size {list} -- x,y,z dimensions of highest res voxel size (nm)
        
    Keyword Arguments:
        num_resolutions {int} -- the number of resolutions to upload
        chunk_size {list} -- size of chunks to upload. If None, uses img_size/2.
        parallel {bool} -- whether to upload chunks in parallel
        layer_type {str} -- one of "image" or "segmentation"
        dtype {str} -- one of "uint16" or "uint64". If None, uses default for layer type.
    Returns:
        vol {cloudvolume.CloudVolume} -- volume to upload to
    """
    if chunk_size is None:
        chunk_size = [int(i / 2) for i in img_size]
    if dtype is None:
        if layer_type == "image":
            dtype = "uint16"
        elif layer_type == "segmentation":
            dtype = "uint64"
        else:
            raise ValueError(
                f"layer type is {layer_type}, when it should be image or str")

    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type=layer_type,
        data_type=dtype,  # Channel images might be 'uint8'
        encoding="raw",  # raw, jpeg, compressed_segmentation, fpzip, kempressed
        resolution=voxel_size,  # Voxel scaling, units are in nanometers
        voxel_offset=[0, 0, 0],  # x,y,z offset in voxels from the origin
        chunk_size=chunk_size,  # units are voxels
        volume_size=[i * 2**(num_resolutions - 1) for i in img_size],
        # volume_size=img_size,  # e.g. a cubic millimeter dataset
        skeletons="skeletons",
    )
    vol = CloudVolume(precomputed_path, info=info, parallel=parallel)
    [
        vol.add_scale((2**i, 2**i, 2**i), chunk_size=chunk_size)
        for i in range(num_resolutions)
    ]
    vol.commit_info()
    if layer_type == "image":
        vols = [
            CloudVolume(precomputed_path, mip=i, parallel=parallel)
            for i in range(num_resolutions - 1, -1, -1)
        ]
    elif layer_type == "segmentation":
        skel_info = {
            "@type":
            "neuroglancer_skeletons",
            "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
            "vertex_attributes": [
                {
                    "id": "radius",
                    "data_type": "float32",
                    "num_components": 1
                },
                {
                    "id": "vertex_types",
                    "data_type": "float32",
                    "num_components": 1
                },
                {
                    "id": "vertex_color",
                    "data_type": "float32",
                    "num_components": 4
                },
            ],
        }
        with storage.SimpleStorage(vol.cloudpath) as stor:
            stor.put_json(str(Path("skeletons") / "info"), skel_info)
        vols = [vol]
    return vols