def post_swcs_to_dvid(config, items):
    """
    Send the given SWC files as key/value pairs to DVID.
    
    Args:
        config: The CreateSkeletons workflow config data
    
        items: list-of-tuples (body_id, swc_text, error_text)
               If swc_text is None or error_text is NOT None, then nothing is posted.
               (We could have filtered out such items upstream, but it's convenient to just handle it here.)
    """
    # Re-use session for connection pooling.
    session = default_dvid_session()

    # Re-use resource manager client connections, too.
    # (If resource-server is empty, this will return a "dummy client")    
    resource_client = ResourceManagerClient( config["options"]["resource-server"],
                                             config["options"]["resource-port"] )

    dvid_server = config["dvid-info"]["dvid"]["server"]
    uuid = config["dvid-info"]["dvid"]["uuid"]
    instance = config["dvid-info"]["dvid"]["skeletons-destination"]

    for (body_id, swc_contents, err) in items:
        if swc_contents is None or err is not None:
            continue

        swc_contents = swc_contents.encode('utf-8')

        @auto_retry(3, pause_between_tries=60.0, logging_name=__name__)
        def write_swc():
            with resource_client.access_context(dvid_server, False, 1, len(swc_contents)):
                session.post(f'{dvid_server}/api/node/{uuid}/{instance}/key/{body_id}_swc', swc_contents)
        write_swc()
Esempio n. 2
0
    def get_legacy_sparsevol(cls, server, uuid, instance_name, body_id, scale=0):
        """
        Returns the coordinates (Z,Y,X) of all voxels in the given body_id at the given scale.
        
        Note: For large bodies, this will be a LOT of coordinates at scale 0.
        
        Note: The returned coordinates are native to the requested scale.
              For instance, if the first Z-coordinate at scale 0 is 128,
              then at scale 1 it is 64, etc.
        
        Note: This function requests the data from DVID in the legacy 'rles' format,
              which is much less efficient than the newer 'blocks' format
              (but it's easy enough to parse that we can do it in Python).

        Return an array of coordinates of the form:
    
            [[Z,Y,X],
             [Z,Y,X],
             [Z,Y,X],
             ...
            ]
        """
        if not server.startswith('http://'):
            server = 'http://' + server
        session = default_dvid_session()
        r = session.get(f'{server}/api/node/{uuid}/{instance_name}/sparsevol/{body_id}?format=rles&scale={scale}')
        r.raise_for_status()
        
        return parse_rle_response( r.content )
def extend_list_value(dvid_server, uuid, kv_instance, key, new_list):
    """
    For the list stored at the given keyvalue instance and key, extend it with the given new_list.
    If the keyvalue instance and/or key are missing from the server, create them.
    """
    assert isinstance(new_list, list)
    old_list = []
    session = default_dvid_session()

    r = session.get('{dvid_server}/api/node/{uuid}/{kv_instance}/keys'.format(**locals()))
    if r.status_code not in (200,400):
        r.raise_for_status()
    
    if r.status_code == 400:
        # Create the keyvalue instance first
        r_post = session.post('{dvid_server}/api/repo/{uuid}/instance'.format(**locals()),
                               json={ "typename": "keyvalue", 
                                      "dataname": kv_instance } )
        r_post.raise_for_status()

    elif key in r.json():
        # Fetch original value
        r = session.get('{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format(**locals()))
        r.raise_for_status()
        old_list = r.json()
        assert isinstance(old_list, list)

    new_list = list(set(old_list + new_list))
    if set(new_list) != set(old_list):
        logger.debug("Updating '{}/{}' list from: {} to: {}".format( kv_instance, key, old_list, new_list ))
        r = session.post('{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format(**locals()),
                          json=new_list)
        r.raise_for_status()
def post_meshes_to_dvid(config, instance_name, partition_items):
    """
    Send the given meshes (either .obj or .drc) as key/value pairs to DVID.
    
    Args:
        config: The CreateMeshes workflow config data
        
        instance_name: key-value instance to post to
            
        partition_items: tuple (group_id, [(segment_id, mesh_data), (segment_id, mesh_data)])
    """
    # Re-use session for connection pooling.
    session = default_dvid_session()

    # Re-use resource manager client connections, too.
    # (If resource-server is empty, this will return a "dummy client")    
    resource_client = ResourceManagerClient( config["options"]["resource-server"],
                                             config["options"]["resource-port"] )

    dvid_server = config["dvid-info"]["dvid"]["server"]
    uuid = config["dvid-info"]["dvid"]["uuid"]
    
    grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"]
    mesh_format = config["mesh-config"]["storage"]["format"]

    if grouping_scheme == "no-groups":
        for group_id, segment_ids_and_meshes in partition_items:
            for (segment_id, mesh_data) in segment_ids_and_meshes:

                @auto_retry(3, pause_between_tries=60.0, logging_name=__name__)
                def write_mesh():
                    with resource_client.access_context(dvid_server, False, 2, len(mesh_data)):
                        session.post(f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}', mesh_data)
                        session.post(f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}_info', json={ 'format': mesh_format })
                
                write_mesh()
    else:
        # All other grouping schemes, including 'singletons' write tarballs.
        # (In the 'singletons' case, there is just one tarball per body.)
        for group_id, segment_ids_and_meshes in partition_items:
            tar_name = _get_group_name(config, group_id)
            tar_stream = BytesIO()
            with closing(tarfile.open(tar_name, 'w', tar_stream)) as tf:
                for (segment_id, mesh_data) in segment_ids_and_meshes:
                    mesh_name = _get_mesh_name(config, segment_id)
                    f_info = tarfile.TarInfo(mesh_name)
                    f_info.size = len(mesh_data)
                    tf.addfile(f_info, BytesIO(mesh_data))
    
            tar_bytes = tar_stream.getbuffer()

            @auto_retry(3, pause_between_tries=60.0, logging_name=__name__)
            def write_tar():
                with resource_client.access_context(dvid_server, False, 1, len(tar_bytes)):
                    session.post(f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{tar_name}', tar_bytes)
            
            write_tar()
Esempio n. 5
0
def update_extents(dvid_server, uuid, name, minimal_extents_zyx):
    """
    Ensure that the given data instance has at least the given extents.
    
    Args:
        dvid_server (str): location of dvid server
        uuid (str): version id
        name (str): data instance name
        minimal_extents: 3D bounding box [min_zyx, max_zyx] = [(z0,y0,x0), (z1,y1,x1)].
                         If provided, data extents will be at least this large (possibly larger).
                         (The max extent should use python conventions, i.e. the MaxPoint + 1)
    """
    session = default_dvid_session()
    minimal_extents_zyx = np.array(minimal_extents_zyx, dtype=int)
    assert minimal_extents_zyx.shape == (2,3), \
        "Minimal extents must be provided as a 3D bounding box: [(z0,y0,x0), (z1,y1,x1)]"
    logger.info("Updating extents for {uuid}/{name}".format(**locals()))

    minimal_extents_xyz = minimal_extents_zyx[:, ::-1].copy()

    # Fetch original extents.
    r = session.get(
        '{dvid_server}/api/node/{uuid}/{name}/info'.format(**locals()))
    r.raise_for_status()

    info = r.json()
    logger.debug("Read extents: " + json.dumps(info))

    orig_extents_xyz = np.array([(1e9, 1e9, 1e9), (-1e9, -1e9, -1e9)],
                                dtype=int)
    if info["Extended"]["MinPoint"] is not None:
        orig_extents_xyz[0] = info["Extended"]["MinPoint"]

    if info["Extended"]["MaxPoint"] is not None:
        orig_extents_xyz[1] = info["Extended"]["MaxPoint"]
        orig_extents_xyz[1] += 1

    minimal_extents_xyz[0] = np.minimum(minimal_extents_xyz[0],
                                        orig_extents_xyz[0])
    minimal_extents_xyz[1] = np.maximum(minimal_extents_xyz[1],
                                        orig_extents_xyz[1])

    if (minimal_extents_xyz != orig_extents_xyz).any():
        min_point_xyz = minimal_extents_xyz[0]
        max_point_xyz = minimal_extents_xyz[1] - 1
        extents_json = {
            "MinPoint": min_point_xyz.tolist(),
            "MaxPoint": max_point_xyz.tolist()
        }

        url = '{dvid_server}/api/node/{uuid}/{name}/extents'.format(**locals())
        logger.debug("Posting new extents: {}".format(
            json.dumps(extents_json)))
        r = session.post(url, json=extents_json)
        r.raise_for_status()
def update_extents(dvid_server, uuid, name, minimal_extents_zyx):
    """
    Ensure that the given data instance has at least the given extents.
    
    Args:
        dvid_server (str): location of dvid server
        uuid (str): version id
        name (str): data instance name
        minimal_extents: 3D bounding box [min_zyx, max_zyx] = [(z0,y0,x0), (z1,y1,x1)].
                         If provided, data extents will be at least this large (possibly larger).
                         (The max extent should use python conventions, i.e. the MaxPoint + 1)
    """
    session = default_dvid_session()
    minimal_extents_zyx = np.array(minimal_extents_zyx, dtype=int)
    assert minimal_extents_zyx.shape == (2,3), \
        "Minimal extents must be provided as a 3D bounding box: [(z0,y0,x0), (z1,y1,x1)]"
    logger.info("Updating extents for {uuid}/{name}".format(**locals()) )
    
    minimal_extents_xyz = minimal_extents_zyx[:, ::-1].copy()
    
    # Fetch original extents.
    r = session.get('{dvid_server}/api/node/{uuid}/{name}/info'.format(**locals()))
    r.raise_for_status()

    info = r.json()
    logger.debug( "Read extents: " + json.dumps(info) )

    orig_extents_xyz = np.array( [(1e9, 1e9, 1e9), (-1e9, -1e9, -1e9)], dtype=int )
    if info["Extended"]["MinPoint"] is not None:
        orig_extents_xyz[0] = info["Extended"]["MinPoint"]

    if info["Extended"]["MaxPoint"] is not None:
        orig_extents_xyz[1] = info["Extended"]["MaxPoint"]
        orig_extents_xyz[1] += 1

    minimal_extents_xyz[0] = np.minimum(minimal_extents_xyz[0], orig_extents_xyz[0])
    minimal_extents_xyz[1] = np.maximum(minimal_extents_xyz[1], orig_extents_xyz[1])

    if (minimal_extents_xyz != orig_extents_xyz).any():
        min_point_xyz = minimal_extents_xyz[0]
        max_point_xyz = minimal_extents_xyz[1] - 1
        extents_json = { "MinPoint": min_point_xyz.tolist(),
                         "MaxPoint": max_point_xyz.tolist() }

        url = '{dvid_server}/api/node/{uuid}/{name}/extents'.format(**locals())
        logger.debug("Posting new extents: {}".format( json.dumps(extents_json) ))
        r = session.post( url, json=extents_json )
        r.raise_for_status()
Esempio n. 7
0
    def _execute_mappings(self, mapping_df):
        config = self.config_data
        if mapping_df is None:
            raise RuntimeError(
                "Can't load mappings: No agglomeration mapping provided.")

        # Just do this from a single machine (the driver), with a big batch size
        # The writes are serialized on the DVID side, anyway.
        with Timer("Sending mapping", logger):
            ingest_mapping(config["dvid"]["server"],
                           config["dvid"]["uuid"],
                           config["dvid"]["segmentation-name"],
                           config["options"]["mutation-id"],
                           mapping_df,
                           batch_size=100_000,
                           show_progress_bar=False,
                           session=default_dvid_session())
Esempio n. 8
0
def extend_list_value(dvid_server, uuid, kv_instance, key, new_list):
    """
    For the list stored at the given keyvalue instance and key, extend it with the given new_list.
    If the keyvalue instance and/or key are missing from the server, create them.
    """
    assert isinstance(new_list, list)
    old_list = []
    session = default_dvid_session()

    r = session.get(
        '{dvid_server}/api/node/{uuid}/{kv_instance}/keys'.format(**locals()))
    if r.status_code not in (200, 400):
        r.raise_for_status()

    if r.status_code == 400:
        # Create the keyvalue instance first
        r_post = session.post(
            '{dvid_server}/api/repo/{uuid}/instance'.format(**locals()),
            json={
                "typename": "keyvalue",
                "dataname": kv_instance
            })
        r_post.raise_for_status()

    elif key in r.json():
        # Fetch original value
        r = session.get(
            '{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format(
                **locals()))
        r.raise_for_status()
        old_list = r.json()
        assert isinstance(old_list, list)

    new_list = list(set(old_list + new_list))
    if set(new_list) != set(old_list):
        logger.debug("Updating '{}/{}' list from: {} to: {}".format(
            kv_instance, key, old_list, new_list))
        r = session.post(
            '{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format(
                **locals()),
            json=new_list)
        r.raise_for_status()
def post_swcs_to_dvid(config, items):
    """
    Send the given SWC files as key/value pairs to DVID.
    
    Args:
        config: The CreateSkeletons workflow config data
    
        items: list-of-tuples (body_id, swc_text, error_text)
               If swc_text is None or error_text is NOT None, then nothing is posted.
               (We could have filtered out such items upstream, but it's convenient to just handle it here.)
    """
    # Re-use session for connection pooling.
    session = default_dvid_session()

    # Re-use resource manager client connections, too.
    # (If resource-server is empty, this will return a "dummy client")
    resource_client = ResourceManagerClient(
        config["options"]["resource-server"],
        config["options"]["resource-port"])

    dvid_server = config["dvid-info"]["dvid"]["server"]
    uuid = config["dvid-info"]["dvid"]["uuid"]
    instance = config["dvid-info"]["dvid"]["skeletons-destination"]

    for (body_id, swc_contents, err) in items:
        if swc_contents is None or err is not None:
            continue

        swc_contents = swc_contents.encode('utf-8')

        @auto_retry(3, pause_between_tries=60.0, logging_name=__name__)
        def write_swc():
            with resource_client.access_context(dvid_server, False, 1,
                                                len(swc_contents)):
                session.post(
                    f'{dvid_server}/api/node/{uuid}/{instance}/key/{body_id}_swc',
                    swc_contents)

        write_swc()
        def timed_fetch_blocks_from_box(box):
            """
            Fetch the blocks for a given box and return the time it took to fetch them.
            Do not bother decompressing the blocks or combining them into a single volume.
            """
            assert not (box % block_shape).any(), "For this test, all requests must be block-aligned"
            block_boxes = list( boxes_from_grid(box, Grid(block_shape)) )
            block_coords_xyz = np.array(block_boxes)[:,0,::-1] // block_shape
            block_coords_str = ','.join(map(str, block_coords_xyz.flat))

            voxel_count = np.prod(box[1] - box[0])

            session = default_dvid_session()
            url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}'
            
            with resource_mgr_client.access_context(server, True, 1, voxel_count):
                timestamp = datetime.now()
                with Timer() as timer:
                    r = session.get(url)
            
            r.raise_for_status()
            return timestamp, voxel_count, len(r.content), timer.seconds
Esempio n. 11
0
    def get_roi(self, roi):
        """
        An alternate implementation of libdvid.DVIDNodeService.get_roi(),
        since DVID sometimes returns strange 503 errors and DVIDNodeService.get_roi()
        doesn't know how to handle them.
        """
        session = default_dvid_session()

        # grab roi blocks (should use libdvid but there are problems handling 206 status)
        import requests
        addr = self.dvid_server + "/api/node/" + str(self.uuid) + "/" + str(roi) + "/roi"
        if not self.dvid_server.startswith("http://"):
            addr = "http://" + addr
        data = session.get(addr)
        roi_blockruns = data.json()
        
        roi_blocks = []
        for (z,y,x_first, x_last) in roi_blockruns:
            for x in range(x_first, x_last+1):
                roi_blocks.append((z,y,x))
        
        return roi_blocks
        def timed_fetch_blocks_from_box(box):
            """
            Fetch the blocks for a given box and return the time it took to fetch them.
            Do not bother decompressing the blocks or combining them into a single volume.
            """
            assert not (box % block_shape).any(
            ), "For this test, all requests must be block-aligned"
            block_boxes = list(boxes_from_grid(box, Grid(block_shape)))
            block_coords_xyz = np.array(block_boxes)[:, 0, ::-1] // block_shape
            block_coords_str = ','.join(map(str, block_coords_xyz.flat))

            voxel_count = np.prod(box[1] - box[0])

            session = default_dvid_session()
            url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}'

            with resource_mgr_client.access_context(server, True, 1,
                                                    voxel_count):
                timestamp = datetime.now()
                with Timer() as timer:
                    r = session.get(url)

            r.raise_for_status()
            return timestamp, voxel_count, len(r.content), timer.seconds
Esempio n. 13
0
    def get_coarse_sparsevol(cls, server, uuid, instance_name, body_id, supervoxels=False):
        """
        Return the 'coarse sparsevol' representation of a given body.
        This is similar to the sparsevol representation at scale=6,
        EXCEPT that it is generated from the label index, so no blocks
        are lost from downsampling.

        Return an array of coordinates of the form:
    
            [[Z,Y,X],
             [Z,Y,X],
             [Z,Y,X],
             ...
            ]
        """
        
        supervoxels = str(bool(supervoxels)).lower()
        if not server.startswith('http://'):
            server = 'http://' + server
        session = default_dvid_session()
        r = session.get(f'{server}/api/node/{uuid}/{instance_name}/sparsevol-coarse/{body_id}?supervoxels={supervoxels}')
        r.raise_for_status()
        
        return parse_rle_response( r.content )
Esempio n. 14
0
def is_node_locked(dvid_server, uuid):
    # Verify that the node is open for writing!
    session = default_dvid_session()
    r = session.get(f'{dvid_server}/api/node/{uuid}/commit')
    r.raise_for_status()
    return r.json()["Locked"]
Esempio n. 15
0
        def writeimagepyramid(vol3d):
            blknum, vol = vol3d

            from PIL import Image
            from scipy import ndimage
            import io
            import numpy
            s = default_dvid_session()

            # create thread pool for parallel
            from multiprocessing.dummy import Pool as ThreadPool
            NUM_THREADS = 4
            ACTIVE_REQS = 16

            #pool = ThreadPool(NUM_THREADS)

            # actually perform tile load
            def loadTile(reqpair):
                urlreq, reqbuff = reqpair
                s.post(urlreq, data=reqbuff)

            work_queue = []
            # iterate slice by slice
            for slicenum in range(0, BLKSIZE):
                imslice = vol[slicenum, :, :]
                imlevels = []
                imlevels.append(imslice)
                # use generic downsample algorithm
                for level in range(1, maxlevel + 1):
                    dim1, dim2 = imlevels[level - 1].shape
                    # go to max level regardless of actual image size
                    #if dim1 < TILESIZE and dim2 < TILESIZE:
                    # image size is already smaller even though not at max level
                    #print "Not at max level"
                    #    break
                    imlevels.append(
                        ndimage.interpolation.zoom(imlevels[level - 1], 0.5))

                # write pyramid for each slice using custom request
                for levelnum in range(0, len(imlevels)):
                    levelslice = imlevels[levelnum]
                    dim1, dim2 = levelslice.shape

                    num1tiles = (dim1 - 1) // TILESIZE + 1
                    num2tiles = (dim2 - 1) // TILESIZE + 1

                    for iter1 in range(0, num1tiles):
                        for iter2 in range(0, num2tiles):
                            # extract tile
                            tileholder = numpy.zeros((TILESIZE, TILESIZE),
                                                     numpy.uint8)
                            min1 = iter1 * TILESIZE
                            min2 = iter2 * TILESIZE
                            tileslice = levelslice[min1:min1 + TILESIZE,
                                                   min2:min2 + TILESIZE]
                            t1, t2 = tileslice.shape
                            tileholder[0:t1, 0:t2] = tileslice

                            # write tileholder to dvid
                            buf = BytesIO()
                            img = Image.frombuffer('L', (TILESIZE, TILESIZE),
                                                   tileholder.tostring(),
                                                   'raw', 'L', 0, 1)
                            imformatpil = imformat
                            if imformat == "jpg":
                                imformatpil = "jpeg"
                            img.save(buf, format=imformatpil)

                            if axis == "xy":
                                work_queue.append(
                                    (server + "/api/node/" + uuid + "/" +
                                     tilename + "/tile/" + axis + "/" +
                                     str(levelnum) + "/" + str(iter2) + "_" +
                                     str(iter1) + "_" +
                                     str(slicenum + blknum * BLKSIZE),
                                     buf.getvalue()))
                            elif axis == "xz":
                                work_queue.append(
                                    (server + "/api/node/" + uuid + "/" +
                                     tilename + "/tile/" + axis + "/" +
                                     str(levelnum) + "/" + str(iter2) + "_" +
                                     str(slicenum + blknum * BLKSIZE) + "_" +
                                     str(iter1), buf.getvalue()))
                            else:
                                work_queue.append(
                                    (server + "/api/node/" + uuid + "/" +
                                     tilename + "/tile/" + axis + "/" +
                                     str(levelnum) + "/" +
                                     str(slicenum + blknum * BLKSIZE) + "_" +
                                     str(iter2) + "_" + str(iter1),
                                     buf.getvalue()))
                            buf.close()

                            # submit last jobs if any remain
                            if len(work_queue) == ACTIVE_REQS:
                                pool = ThreadPool(NUM_THREADS)
                                pool.map(loadTile, work_queue)

                                # close the pool to further requests
                                pool.close()
                                # wait for any remaining threads
                                pool.join()
                                work_queue = []

                # submit last jobs if any remain
                if len(work_queue) > 0:
                    pool = ThreadPool(NUM_THREADS)
                    pool.map(loadTile, work_queue)

                    # close the pool to further requests
                    pool.close()
                    # wait for any remaining threads
                    pool.join()
 def session(self):
     if self._session is None:
         self._session = default_dvid_session('ingest_label_indexes')
     return self._session
    def execute(self):
        from PIL import Image
        import numpy
        import os
       
        iterslices = self.BLKSIZE * self.config_data["options"]["numblocklayers"]

        minslice = self.config_data["minslice"]
        # map file to numpy array
        basename = self.config_data["basename"]
        
        # format should be gs://<bucket>/path
        gbucketname = ""
        gpath = ""
        if basename.startswith('gs://'):
            # parse google bucket names
            tempgs = basename.split('//')
            bucketpath = tempgs[1].split('/')
            gbucketname = bucketpath[0]
            gpath = '/'.join(bucketpath[1:])


        server = None
     
        xoffset = yoffset = zoffset = 0

        if "offset" in self.config_data["options"]:
            xoffset = self.config_data["options"]["offset"][0] 
            yoffset = self.config_data["options"]["offset"][1] 
            zoffset = self.config_data["options"]["offset"][2] 

            if xoffset % self.BLKSIZE != 0:
                raise Exception("offset not block aligned")        
            if yoffset % self.BLKSIZE != 0:
                raise Exception("offset not block aligned")        
            if zoffset % self.BLKSIZE != 0:
                raise Exception("offset not block aligned")        

            xoffset /= self.BLKSIZE
            yoffset /= self.BLKSIZE
            zoffset /= self.BLKSIZE

        # this will start the Z block writing at the specified offse
        # (changes default behavior when loading nonzero starting image slice)
        zoffset -= (minslice // self.BLKSIZE)


        # create metadata before workers start if using DVID
        if "output-dir" not in self.config_data or self.config_data["output-dir"] == "":
            # write to dvid
            server = self.config_data["dvid-info"]["dvid-server"]
            uuid = self.config_data["dvid-info"]["uuid"]
            grayname = self.config_data["dvid-info"]["grayname"]
            resource_server = str(self.resource_server)
            resource_port = self.resource_port

            # create grayscale type
            node_service = retrieve_node_service(server, uuid, resource_server, resource_port, self.APPNAME)
            node_service.create_grayscale8(str(grayname), self.BLKSIZE)

        for slice in range(self.config_data["minslice"], self.config_data["maxslice"]+1, iterslices):
            # parallelize images across many machines
            imgs = self.sc.parallelize(list(range(slice, slice+iterslices)), iterslices)

            def img2npy(slicenum):
                try:
                    img = None
                    if gbucketname == "":
                        img = Image.open(basename % slicenum)
                    else:
                        from gcloud import storage
                        from io import BytesIO
                        client = storage.Client()
                        gbucket = client.get_bucket(gbucketname)
                        gblob = gbucket.get_blob(gpath % slicenum)
                        
                        # write to bytes which implements file interface
                        gblobfile = BytesIO()
                        gblob.download_to_file(gblobfile)
                        gblobfile.seek(0)
                        img = Image.open(gblobfile)
                    return slicenum, numpy.array(img)
                except Exception as e:
                    # just return a blank slice -- will be handled downstream
                    return slicenum, numpy.zeros((0,0), numpy.uint8)

            npy_images = imgs.map(img2npy) 
          
            # map numpy array into y lines of block height
            blocksize = self.BLKSIZE
            blocklimit = self.BLOCKLIMIT 
            def npy2lines(arrpair):
                z, arr = arrpair
                ysize, xsize = arr.shape
                npylines = []
               
                for itery in range(0, ysize, blocksize):
                    line = numpy.zeros((blocksize, ((xsize-1) // blocksize + 1)*blocksize), numpy.uint8)
                    uppery = blocksize
                    if (itery + blocksize) > ysize:
                        uppery = ysize - itery

                    line[0:uppery, 0:xsize] = arr[itery:itery+blocksize, 0:xsize]

                    npylines.append((itery // blocksize, (z, line)))

                return npylines

            npy_lines = npy_images.flatMap(npy2lines)

            # reduce y lines into DVID blocks
            groupedlines = npy_lines.groupByKey()         

            # map y lines => (y, blocks)
            def lines2blocks(linespair):
                y, linesp = linespair

                xsize = None
                blockdata = None
                for z, line in linesp:
                    if xsize is None:
                        _, xsize = line.shape
                        blockdata = numpy.zeros((iterslices, blocksize, xsize), numpy.uint8)

                    blockdata[(z - minslice)%iterslices, :, :] = line
                return y, blockdata
            
            yblocks = groupedlines.map(lines2blocks)
       
            # map multilayer of blocks to an array of single layer blocks
            def multi2single(yblocks):
                ybindex, blocks = yblocks
                blockarr = []
                num_layers = iterslices // blocksize
                for layer in range(0,num_layers):
                    blockarr.append(((ybindex, layer), blocks[layer*blocksize:(layer*blocksize+blocksize),:,:]))

                return blockarr

            yblockssplit = yblocks.flatMap(multi2single)


            if "output-dir" in self.config_data and self.config_data["output-dir"] != "":
                # write blocks to disk for separte post-process -- write directly to DVID eventually?
                output_dir = self.config_data["output-dir"]
                def write2disk(yblocks):
                    zbindex = slice // blocksize 
                    (ybindex, layer), blocks = yblocks
                    zbindex += layer

                    zsize,ysize,xsize = blocks.shape
                    
                    outdir = output_dir 
                    outdir += "/" + ("%05d" % zbindex) + ".z/"
                    filename = outdir + ("%05d" % ybindex) + "-" + str(xsize // blocksize) + ".blocks"

                    try: 
                        os.makedirs(outdir)
                    except Exception as e:
                        pass

                    # extract blocks from buffer and write to disk
                    fout = open(filename, 'wb')
                    for iterx in range(0, xsize, blocksize):
                        block = blocks[:,:,iterx:iterx+blocksize].copy()
                        fout.write(block)
                    fout.close()

                yblockssplit.foreach(write2disk) 
            else:
                # write to dvid
                server = self.config_data["dvid-info"]["dvid-server"]
                uuid = self.config_data["dvid-info"]["uuid"]
                grayname = self.config_data["dvid-info"]["grayname"]
                appname = self.APPNAME
                delimiter = self.config_data["options"]["blankdelimiter"]
                
                def write2dvid(yblocks):
                    from libdvid import ConnectionMethod
                    import numpy
                    node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) 
                    
                    # get block coordinates
                    zbindex = slice // blocksize 
                    (ybindex, layer), blocks = yblocks
                    zbindex += layer
                    zsize,ysize,xsize = blocks.shape
                    xrun = xsize // blocksize
                    xbindex = 0 # assume x starts at 0!!

                    # retrieve blocks
                    blockbuffer = ""

                    # skip blank blocks
                    startblock = False
                    xrun = 0
                    xbindex = 0

                    for iterx in range(0, xsize, blocksize):
                        block = blocks[:,:,iterx:iterx+blocksize].copy()
                        vals = numpy.unique(block)
                        if len(vals) == 1 and vals[0] == delimiter:
                            # check if the block is blank
                            if startblock:
                                # if the previous block has data, push blocks in current queue
                                node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) 
                                startblock = False
                                xrun = 0
                                blockbuffer = ""

                        else:
                            if startblock == False:
                                xbindex = iterx // blocksize
                            
                            startblock = True
                            blockbuffer += block.tobytes()
                            xrun += 1

                            if blocklimit > 0 and xrun >= blocklimit:
                                # if the previous block has data, push blocks in current queue
                                node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) 
                                startblock = False
                                xrun = 0
                                blockbuffer = ""

                    # write-out leftover blocks
                    if xrun > 0:
                        node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) 


                yblockssplit.foreach(write2dvid)
        
            self.workflow_entry_exit_printer.write_data("Ingested %d slices" % iterslices)
        
        # just fetch one image at driver to get dims
        width = height = 1
        try:
            img = None
            if gbucketname == "":
                img = Image.open(basename % minslice) 
                width, height = img.width, img.height
            else:
                from gcloud import storage
                from io import BytesIO
                client = storage.Client()
                gbucket = client.get_bucket(gbucketname)
                gblob = gbucket.get_blob(gpath % minslice)
                
                # write to bytes which implements file interface
                gblobfile = BytesIO()
                gblob.download_to_file(gblobfile)
                gblobfile.seek(0)
                img = Image.open(gblobfile)
                width, height = img.width, img.height
        except Exception as e:
            # just set size to 1 
            pass

        if "output-dir" not in self.config_data or self.config_data["output-dir"] == "":
            # update metadata
            grayext = {}
            grayext["MinPoint"] = [xoffset*self.BLKSIZE,yoffset*self.BLKSIZE,zoffset*self.BLKSIZE+minslice]
            grayext["MaxPoint"] = [xoffset*self.BLKSIZE + width-1, yoffset*self.BLKSIZE + height-1, zoffset*self.BLKSIZE+minslice + self.config_data["maxslice"]]
            if not server.startswith("http://"):
                server = "http://" + server
            session = default_dvid_session()
            session.post(server + "/api/node/" + uuid + "/" + grayname + "/extents", json=grayext)
Esempio n. 18
0
        def writeimagepyramid(part_data):
            logger = logging.getLogger(__name__)
            part, vol = part_data
            offset = part.get_offset()
            zslice = offset.z
            from PIL import Image
            from scipy import ndimage
            import io
            s = default_dvid_session()

            # pad data with delimiter if needed
            timslice = vol[0, :, :]
            shiftx = offset.x % tilesize
            shifty = offset.y % tilesize
            tysize, txsize = timslice.shape
            ysize = tysize + shifty
            xsize = txsize + shiftx
            imslice = np.zeros((ysize, xsize))
            imslice[:, :] = delimiter
            imslice[shifty:ysize, shiftx:xsize] = timslice
            curry = (offset.y - shifty) // 2
            currx = (offset.x - shiftx) // 2

            imlevels = []
            tileoffsetyx = []
            imlevels.append(imslice)
            tileoffsetyx.append((offset.y // tilesize, offset.x // tilesize))

            with Timer() as downsample_timer:
                # use generic downsample algorithm
                for level in range(1, maxlevel + 1):

                    tysize, txsize = imlevels[level - 1].shape

                    shiftx = currx % tilesize
                    shifty = curry % tilesize

                    ysize = tysize + shifty
                    xsize = txsize + shiftx
                    imslice = np.zeros((ysize, xsize))
                    imslice[:, :] = delimiter
                    timslice = ndimage.interpolation.zoom(
                        imlevels[level - 1], 0.5)
                    imslice[shifty:ysize, shiftx:xsize] = timslice
                    imlevels.append(imslice)
                    tileoffsetyx.append((currx // tilesize, curry // tilesize))

                    curry = (curry - shifty) // 2
                    currx = (currx - shiftx) // 2

            logger.info("Downsampled {} levels in {:.3f} seconds".format(
                maxlevel, downsample_timer.seconds))

            # write tile pyramid using custom requests
            for levelnum in range(0, len(imlevels)):
                levelslice = imlevels[levelnum]
                dim1, dim2 = levelslice.shape

                num1tiles = (dim1 - 1) // tilesize + 1
                num2tiles = (dim2 - 1) // tilesize + 1

                with Timer() as post_timer:
                    for iter1 in range(0, num1tiles):
                        for iter2 in range(0, num2tiles):
                            # extract tile
                            tileholder = np.zeros((tilesize, tilesize),
                                                  np.uint8)
                            tileholder[:, :] = delimiter
                            min1 = iter1 * tilesize
                            min2 = iter2 * tilesize
                            tileslice = levelslice[min1:min1 + tilesize,
                                                   min2:min2 + tilesize]
                            t1, t2 = tileslice.shape
                            tileholder[0:t1, 0:t2] = tileslice

                            starty, startx = tileoffsetyx[levelnum]
                            starty += iter1
                            startx += iter2
                            if createtiles:
                                buf = BytesIO()
                                img = Image.frombuffer('L',
                                                       (tilesize, tilesize),
                                                       tileholder.tostring(),
                                                       'raw', 'L', 0, 1)
                                img.save(buf, format="png")

                                urlreq = server + "/api/node/" + uuid + "/" + tilename + "/tile/xy/" + str(
                                    levelnum) + "/" + str(startx) + "_" + str(
                                        starty) + "_" + str(zslice)
                                s.post(urlreq, data=buf.getvalue())
                                buf.close()

                            if createtilesjpeg:
                                buf = BytesIO()
                                img = Image.frombuffer('L',
                                                       (tilesize, tilesize),
                                                       tileholder.tostring(),
                                                       'raw', 'L', 0, 1)
                                img.save(buf, format="jpeg")

                                urlreq = server + "/api/node/" + uuid + "/" + tilenamejpeg + "/tile/xy/" + str(
                                    levelnum) + "/" + str(startx) + "_" + str(
                                        starty) + "_" + str(zslice)
                                s.post(urlreq, data=buf.getvalue())
                                buf.close()
                logger.info("Posted {} tiles (level={}) in {} seconds".format(
                    num1tiles * num2tiles, levelnum, post_timer.seconds))
Esempio n. 19
0
def reload_server_metadata(dvid_server):
    session = default_dvid_session()
    r = session.post("{}/api/server/reload-metadata".format(dvid_server))
    r.raise_for_status()
Esempio n. 20
0
def is_node_locked(dvid_server, uuid):
    # Verify that the node is open for writing!
    session = default_dvid_session()
    r = session.get(f'{dvid_server}/api/node/{uuid}/commit')
    r.raise_for_status()
    return r.json()["Locked"]
Esempio n. 21
0
    def execute(self):
        """
        Execute spark workflow.
        """
        self._sanitize_config()
        session = default_dvid_session()

        dvid_info = self.config_data["dvid-info"]
        options = self.config_data["options"]
        block_shape = 3 * (options["blocksize"], )
        self.partition_size = options["blockwritelimit"] * options["blocksize"]
        # ?? num parallel requests might be really small at high levels of pyramids

        # xdim is unbounded or very large
        partition_dims = PartitionDims(options["blocksize"],
                                       options["blocksize"],
                                       self.partition_size)
        partition_schema = partitionSchema(
            partition_dims,
            blank_delimiter=options["blankdelimiter"],
            padding=options["blocksize"],
            enablemask=options["has-dvidmask"])

        offset_zyx = np.array(options["offset"][::-1])
        offset_zyx[0] += options["minslice"]
        imgreader = imagefileSrc(partition_schema, options["basename"],
                                 (options["minslice"], options["maxslice"]),
                                 VolumeOffset(*offset_zyx), self.sc)

        # !! hack: override iteration size that is set to partition size, TODO: add option
        # this just makes the downstream processing a little more convenient, and reduces
        # unnecessary DVID patching if that is enabled.
        # (must be a multiple of block size)
        imgreader.iteration_size = options["num-tasks"]

        # get dims from image (hackage)
        from PIL import Image
        import requests
        if '%' in options["basename"]:
            minslice_name = options["basename"] % options["minslice"]
        elif '{' in options["basename"]:
            minslice_name = options["basename"].format(options["minslice"])
        else:
            raise RuntimeError(
                f"Unrecognized format string for image basename: {options['basename']}"
            )

        img = Image.open(minslice_name)
        volume_shape = (1 + options["maxslice"] - options["minslice"],
                        img.height, img.width)
        del img

        global_box_zyx = np.zeros((2, 3), dtype=int)
        global_box_zyx[0] = options["offset"]
        global_box_zyx[0] += (options["minslice"], 0, 0)

        global_box_zyx[1] = global_box_zyx[0] + volume_shape

        if options["create-pyramid"]:
            if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"],
                               dvid_info["dataname"]):
                logger.info(
                    "'{dataname}' already exists, skipping creation".format(
                        **dvid_info))
            else:
                # create data instance and disable dvidmask
                # !! assume if data instance exists and mask is set that all pyramid
                # !! also exits, meaning the mask should be used.
                options["has-dvidmask"] = False
                if options["disable-original"]:
                    logger.info(
                        "Not creating '{dataname}' due to 'disable-original' config setting"
                        .format(**dvid_info))
                elif 0 in options["skipped-pyramid-levels"]:
                    logger.info(
                        "Not creating '{dataname}' due to 'skipped-pyramid-levels' config setting"
                        .format(**dvid_info))
                else:
                    if options["is-rawarray"]:
                        create_rawarray8(dvid_info["dvid-server"],
                                         dvid_info["uuid"],
                                         dvid_info["dataname"], block_shape)
                    else:
                        create_label_instance(dvid_info["dvid-server"],
                                              dvid_info["uuid"],
                                              dvid_info["dataname"], 0,
                                              block_shape)

            if not options["disable-original"] and 0 not in options[
                    "skipped-pyramid-levels"]:
                update_extents(dvid_info["dvid-server"], dvid_info["uuid"],
                               dvid_info["dataname"], global_box_zyx)

                # Bottom level of pyramid is listed as neuroglancer-compatible
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"],
                                  '.meta', 'neuroglancer',
                                  [dvid_info["dataname"]])

        # determine number of pyramid levels if not specified
        if options["create-pyramid"] or options["create-pyramid-jpeg"]:
            if options["pyramid-depth"] == -1:
                options["pyramid-depth"] = 0
                zsize = options["maxslice"] - options["minslice"] + 1
                while zsize > 512:
                    options["pyramid-depth"] += 1
                    zsize /= 2

                # NeuTu doesn't work well if there aren't at least a few pyramid levels.
                # Even for small volumes, use at least a few pyramid levels,
                # unless the depth was explicit in the config.
                options["pyramid-depth"] = max(options["pyramid-depth"], 4)

        # create pyramid data instances
        if options["create-pyramid-jpeg"]:
            dataname_jpeg = dvid_info["dataname"] + self.JPEGPYRAMID_NAME
            if 0 in options["skipped-pyramid-levels"]:
                logger.info(
                    "Not creating '{}' due to 'skipped-pyramid-levels' config setting"
                    .format(dataname_jpeg))
            else:
                if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"],
                                   dataname_jpeg):
                    logger.info(
                        "'{}' already exists, skipping creation".format(
                            dataname_jpeg))
                else:
                    create_rawarray8(dvid_info["dvid-server"],
                                     dvid_info["uuid"], dataname_jpeg,
                                     block_shape, Compression.JPEG)

                update_extents(dvid_info["dvid-server"], dvid_info["uuid"],
                               dataname_jpeg, global_box_zyx)

                # Bottom level of pyramid is listed as neuroglancer-compatible
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"],
                                  '.meta', 'neuroglancer', [dataname_jpeg])

        if options["create-pyramid"]:
            for level in range(1, 1 + options["pyramid-depth"]):
                downsampled_box_zyx = global_box_zyx // (2**level)
                downname = dvid_info["dataname"] + "_%d" % level

                if level in options["skipped-pyramid-levels"]:
                    logger.info(
                        "Not creating '{}' due to 'skipped-pyramid-levels' config setting"
                        .format(downname))
                    continue

                if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"],
                                   downname):
                    logger.info(
                        "'{}' already exists, skipping creation".format(
                            downname))
                else:
                    if options["is-rawarray"]:
                        create_rawarray8(dvid_info["dvid-server"],
                                         dvid_info["uuid"], downname,
                                         block_shape)
                    else:
                        create_label_instance(dvid_info["dvid-server"],
                                              dvid_info["uuid"], downname, 0,
                                              block_shape)

                update_extents(dvid_info["dvid-server"], dvid_info["uuid"],
                               downname, downsampled_box_zyx)

                # Higher-levels of the pyramid should not appear in the DVID-lite console.
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"],
                                  '.meta', 'restrictions', [downname])

        if options["create-pyramid-jpeg"]:
            for level in range(1, 1 + options["pyramid-depth"]):
                downsampled_box_zyx = global_box_zyx // (2**level)
                downname = dvid_info[
                    "dataname"] + self.JPEGPYRAMID_NAME + "_%d" % level

                if level in options["skipped-pyramid-levels"]:
                    logger.info(
                        "Not creating '{}' due to 'skipped-pyramid-levels' config setting"
                        .format(downname))
                    continue

                if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"],
                                   downname):
                    logger.info(
                        "'{}' already exists, skipping creation".format(
                            downname))
                else:
                    create_rawarray8(dvid_info["dvid-server"],
                                     dvid_info["uuid"], downname, block_shape,
                                     Compression.JPEG)

                update_extents(dvid_info["dvid-server"], dvid_info["uuid"],
                               downname, downsampled_box_zyx)

                # Higher-levels of the pyramid should not appear in the DVID-lite console.
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"],
                                  '.meta', 'restrictions', [downname])

        # create tiles
        if options["create-tiles"] or options["create-tiles-jpeg"]:
            MinTileCoord = global_box_zyx[0][::-1] // options["tilesize"]
            MaxTileCoord = global_box_zyx[1][::-1] // options["tilesize"]

            # get max level by just finding max tile coord
            maxval = max(MaxTileCoord) - min(MinTileCoord) + 1
            import math
            self.maxlevel = int(math.log(maxval) / math.log(2))

            tilemeta = {}
            tilemeta["MinTileCoord"] = MinTileCoord.tolist()
            tilemeta["MaxTileCoord"] = MaxTileCoord.tolist()
            tilemeta["Levels"] = {}

            currres = 8.0  # just use as placeholder for now
            for level in range(0, self.maxlevel + 1):
                tilemeta["Levels"][str(level)] = {
                    "Resolution": 3 * [currres],
                    "TileSize": 3 * [options["tilesize"]]
                }
                currres *= 2

            if options["create-tiles"]:
                session.post("{dvid-server}/api/repo/{uuid}/instance".format(
                    **dvid_info),
                             json={
                                 "typename": "imagetile",
                                 "dataname":
                                 dvid_info["dataname"] + self.TILENAME,
                                 "source": dvid_info["dataname"],
                                 "format": "png"
                             })
                session.post(
                    "{dvid-server}/api/repo/{uuid}/{dataname}{tilename}/metadata"
                    .format(tilename=self.TILENAME, **dvid_info),
                    json=tilemeta)

            if options["create-tiles-jpeg"]:
                session.post("{dvid-server}/api/repo/{uuid}/instance".format(
                    **dvid_info),
                             json={
                                 "typename": "imagetile",
                                 "dataname":
                                 dvid_info["dataname"] + self.JPEGTILENAME,
                                 "source": dvid_info["dataname"],
                                 "format": "jpg"
                             })
                session.post(
                    "{dvid-server}/api/repo/{uuid}/{dataname_jpeg_tile}/metadata"
                    .format(dataname_jpeg_tile=dvid_info["dataname"] +
                            self.JPEGTILENAME,
                            **dvid_info),
                    json=tilemeta)

        if dvid_info["dvid-server"].startswith("http://127.0.0.1"):

            def reload_meta():
                reload_server_metadata(dvid_info["dvid-server"])

            self.run_on_each_worker(reload_meta)

        # TODO Validation: should verify syncs exist, should verify pyramid depth

        # TODO: set syncs for pyramids, tiles if base datatype exists
        # syncs should be removed before ingestion and added afterward

        levels_cache = {}

        # iterate through each partition
        for arraypartition in imgreader:
            # DVID pad if necessary
            if options["has-dvidmask"]:
                dvidsrc = dvidSrc(dvid_info["dvid-server"],
                                  dvid_info["uuid"],
                                  dvid_info["dataname"],
                                  arraypartition,
                                  resource_server=self.resource_server,
                                  resource_port=self.resource_port)

                arraypartition = dvidsrc.extract_volume()

            # potentially need for future iterations
            arraypartition.persist()

            # check for final layer
            finallayer = imgreader.curr_slice > imgreader.end_slice

            if not options["disable-original"]:
                # Write level-0 of the raw data, even if we aren't writing the rest of the pyramid.
                dataname = datanamelossy = None
                if options["create-pyramid"]:
                    dataname = dvid_info["dataname"]
                if options["create-pyramid-jpeg"]:
                    datanamelossy = dvid_info[
                        "dataname"] + self.JPEGPYRAMID_NAME

                if (dataname or datanamelossy
                    ) and 0 not in options["skipped-pyramid-levels"]:
                    self._write_blocks(arraypartition, dataname, datanamelossy)

            if options["create-tiles"] or options["create-tiles-jpeg"]:
                # repartition into tiles
                schema = partitionSchema(PartitionDims(1, 0, 0))
                tilepartition = schema.partition_data(arraypartition)

                # write unpadded tilesize (will pad with delimiter if needed)
                self._writeimagepyramid(tilepartition)

            if options["create-pyramid"] or options["create-pyramid-jpeg"]:
                if 0 not in levels_cache:
                    levels_cache[0] = []
                levels_cache[0].append(arraypartition)
                curr_level = 1
                downsample_factor = 2

                # should be a multiple of Z blocks or the final fetch
                assert imgreader.curr_slice % options["blocksize"] == 0
                while ((((imgreader.curr_slice // options["blocksize"]) %
                         downsample_factor) == 0) or
                       finallayer) and curr_level <= options["pyramid-depth"]:
                    partlist = levels_cache[curr_level - 1]
                    part = partlist[0]
                    # union all RDDs from the same level
                    for iter1 in range(1, len(partlist)):
                        part = part.union(partlist[iter1])

                    # downsample map
                    israw = options["is-rawarray"]

                    def downsample(part_vol):
                        part, vol = part_vol
                        if not israw:
                            vol = downsample_3Dlabels(vol)[0]
                        else:
                            vol = downsample_raw(vol)[0]
                        return (part, vol)

                    downsampled_array = part.map(downsample)

                    # repart (vol and offset will always be power of two because of padding)
                    def repartition_down(part_volume):
                        part, volume = part_volume
                        downsampled_offset = np.array(part.get_offset()) // 2
                        downsampled_reloffset = np.array(
                            part.get_reloffset()) // 2
                        offsetnew = VolumeOffset(*downsampled_offset)
                        reloffsetnew = VolumeOffset(*downsampled_reloffset)
                        partnew = volumePartition(
                            (offsetnew.z, offsetnew.y, offsetnew.x),
                            offsetnew,
                            reloffset=reloffsetnew)
                        return partnew, volume

                    downsampled_array = downsampled_array.map(repartition_down)

                    # repartition downsample data
                    partition_dims = PartitionDims(options["blocksize"],
                                                   options["blocksize"],
                                                   self.partition_size)
                    schema = partitionSchema(
                        partition_dims,
                        blank_delimiter=options["blankdelimiter"],
                        padding=options["blocksize"],
                        enablemask=options["has-dvidmask"])
                    downsampled_array = schema.partition_data(
                        downsampled_array)

                    # persist before padding if there are more levels
                    if curr_level < options["pyramid-depth"]:
                        downsampled_array.persist()
                        if curr_level not in levels_cache:
                            levels_cache[curr_level] = []
                        levels_cache[curr_level].append(downsampled_array)

                    # pad from DVID (move before persist will allow multi-ingest
                    # but will lead to slightly non-optimal downsampling boundary
                    # effects if using a lossy compression only.
                    if options["has-dvidmask"]:
                        padname = dvid_info["dataname"]
                        if options[
                                "create-pyramid-jpeg"]:  # !! should pad with orig if computing
                            # pad with jpeg
                            padname += self.JPEGPYRAMID_NAME
                        padname += "_%d" % curr_level
                        dvidsrc = dvidSrc(dvid_info["dvid-server"],
                                          dvid_info["uuid"],
                                          padname,
                                          downsampled_array,
                                          resource_server=self.resource_server,
                                          resource_port=self.resource_port)

                        downsampled_array = dvidsrc.extract_volume()

                    # write result
                    downname = None
                    downnamelossy = None
                    if options["create-pyramid"]:
                        downname = dvid_info["dataname"] + "_%d" % curr_level
                    if options["create-pyramid-jpeg"]:
                        downnamelossy = dvid_info[
                            "dataname"] + self.JPEGPYRAMID_NAME + "_%d" % curr_level

                    if curr_level not in options["skipped-pyramid-levels"]:
                        self._write_blocks(downsampled_array, downname,
                                           downnamelossy)

                    # remove previous level
                    del levels_cache[curr_level - 1]
                    curr_level += 1
                    downsample_factor *= 2
        def writeimagepyramid(vol3d):
            blknum, vol = vol3d
            
            from PIL import Image
            from scipy import ndimage
            import io
            import numpy
            s = default_dvid_session()

            # create thread pool for parallel
            from multiprocessing.dummy import Pool as ThreadPool
            NUM_THREADS = 4
            ACTIVE_REQS = 16
            #pool = ThreadPool(NUM_THREADS)
             
            # actually perform tile load
            def loadTile(reqpair):
                urlreq, reqbuff = reqpair 
                s.post(urlreq , data=reqbuff)

            work_queue = []
            # iterate slice by slice
            for slicenum in range(0, BLKSIZE):
                imslice = vol[slicenum, :, :]
                imlevels = []
                imlevels.append(imslice)
                # use generic downsample algorithm
                for level in range(1, maxlevel+1):
                    dim1, dim2 = imlevels[level-1].shape
                    # go to max level regardless of actual image size
                    #if dim1 < TILESIZE and dim2 < TILESIZE:
                        # image size is already smaller even though not at max level
                        #print "Not at max level"
                    #    break
                    imlevels.append(ndimage.interpolation.zoom(imlevels[level-1], 0.5)) 

                # write pyramid for each slice using custom request
                for levelnum in range(0, len(imlevels)):
                    levelslice = imlevels[levelnum]
                    dim1, dim2 = levelslice.shape

                    num1tiles = (dim1-1) // TILESIZE + 1
                    num2tiles = (dim2-1) // TILESIZE + 1

                    for iter1 in range(0, num1tiles):
                        for iter2 in range(0, num2tiles):
                            # extract tile
                            tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8)
                            min1 = iter1*TILESIZE
                            min2 = iter2*TILESIZE
                            tileslice = levelslice[min1:min1+TILESIZE, min2:min2+TILESIZE]
                            t1, t2 = tileslice.shape
                            tileholder[0:t1, 0:t2] = tileslice

                            # write tileholder to dvid
                            buf = BytesIO() 
                            img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1)
                            imformatpil = imformat
                            if imformat == "jpg":
                                imformatpil = "jpeg"
                            img.save(buf, format=imformatpil)

                            if axis == "xy":
                                work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum+blknum*BLKSIZE), buf.getvalue()))
                            elif axis == "xz":
                                work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter1), buf.getvalue()))
                            else:
                                work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter2) + "_" + str(iter1), buf.getvalue()))
                            buf.close()

                            # submit last jobs if any remain
                            if len(work_queue) == ACTIVE_REQS:
                                pool = ThreadPool(NUM_THREADS)
                                pool.map(loadTile, work_queue)
                    
                                # close the pool to further requests
                                pool.close()
                                # wait for any remaining threads
                                pool.join()
                                work_queue = []

                # submit last jobs if any remain
                if len(work_queue) > 0:
                    pool = ThreadPool(NUM_THREADS)
                    pool.map(loadTile, work_queue)
    
                    # close the pool to further requests
                    pool.close()
                    # wait for any remaining threads
                    pool.join()
Esempio n. 23
0
    def execute(self):
        # tile size default
        TILESIZE = 512
        
        server = str(self.config_data["dvid-info"]["dvid-server"])
        uuid = str(self.config_data["dvid-info"]["uuid"])
        grayname = str(self.config_data["dvid-info"]["grayname"])
        tilename = str(self.config_data["dvid-info"]["tilename"])
        
        # determine grayscale blk extants
        if not server.startswith("http://"):
            server = "http://" + server

        
        xmin, ymin, zmin = 0, 0, 0 
        
        minslice = self.config_data["minslice"]
        maxslice = self.config_data["maxslice"]
        # map file to numpy array
        basename = self.config_data["basename"]
        
        # open image
        from PIL import Image
        import numpy
        
        session = default_dvid_session()
        
        img = Image.open(basename % minslice) 
        xmax, ymax, zmax = img.width, img.height, maxslice

        # create tiles type and meta
        imformat = str(self.config_data["options"]["format"])
        session.post(server + "/api/repo/" + uuid + "/instance", json={"typename": "imagetile", "dataname": tilename, "source": grayname, "format": imformat})

        MinTileCoord = [xmin // TILESIZE, ymin // TILESIZE, zmin // TILESIZE]
        MaxTileCoord = [xmax // TILESIZE, ymax // TILESIZE, zmax // TILESIZE]
        
        # get max level by just finding max tile coord
        maxval = max(MaxTileCoord) + 1
        import math
        maxlevel = int(math.log(maxval) / math.log(2))

        tilemeta = {}
        tilemeta["MinTileCoord"] = MinTileCoord
        tilemeta["MaxTileCoord"] = MaxTileCoord
        tilemeta["Levels"] = {}
        currres = 10.0 # just use as placeholder for now
        for level in range(0, maxlevel+1):
            tilemeta["Levels"][str(level)] = { "Resolution" : [currres, currres, currres], "TileSize": [TILESIZE, TILESIZE, TILESIZE]}
            currres *= 2
        
        session.post(server + "/api/node/" + uuid + "/" + tilename + "/metadata", json=tilemeta)
       
        # make each image a separate task
        imgs = self.sparkdvid_context.sc.parallelize(list(range(minslice, maxslice+1)), maxslice-minslice+1)

        def img2npy(slicenum):
            try:
                img = Image.open(basename % slicenum)
                return slicenum, numpy.array(img)
            except Exception as e:
                # could give empty image, but for now just fail
                raise
        npy_images = imgs.map(img2npy) 
    
        appname = self.APPNAME
        resource_server = self.resource_server
        resource_port = self.resource_port

        def writeimagepyramid(image):
            slicenum, imnpy = image 
            
            from PIL import Image
            from scipy import ndimage
            import io
            
            from libdvid import ConnectionMethod
            node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) 

            # actually perform tile load
            def loadTile(reqpair):
                urlreq, reqbuff = reqpair 
                node_service.custom_request(urlreq, reqbuff, ConnectionMethod.POST) 
                #session.post(urlreq , data=reqbuff)
                

            work_queue = []
            # iterate slice by slice
                
            imlevels = []
            imlevels.append(imnpy)
            # use generic downsample algorithm
            for level in range(1, maxlevel+1):
                dim1, dim2 = imlevels[level-1].shape
                imlevels.append(ndimage.interpolation.zoom(imlevels[level-1], 0.5)) 

            # write pyramid for each slice using custom request
            for levelnum in range(0, len(imlevels)):
                levelslice = imlevels[levelnum]
                dim1, dim2 = levelslice.shape

                num1tiles = (dim1-1) // TILESIZE + 1
                num2tiles = (dim2-1) // TILESIZE + 1

                for iter1 in range(0, num1tiles):
                    for iter2 in range(0, num2tiles):
                        # extract tile
                        tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8)
                        min1 = iter1*TILESIZE
                        min2 = iter2*TILESIZE
                        tileslice = levelslice[min1:min1+TILESIZE, min2:min2+TILESIZE]
                        t1, t2 = tileslice.shape
                        tileholder[0:t1, 0:t2] = tileslice

                        # write tileholder to dvid
                        buf = BytesIO() 
                        img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1)
                        imformatpil = imformat
                        if imformat == "jpg":
                            imformatpil = "jpeg"
                        img.save(buf, format=imformatpil)

                        loadTile((tilename + "/tile/xy/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum), buf.getvalue()))
                        buf.close()

        npy_images.foreach(writeimagepyramid)
Esempio n. 24
0
def reload_server_metadata(dvid_server):
    session = default_dvid_session()
    r = session.post("{}/api/server/reload-metadata".format(dvid_server))
    r.raise_for_status()
        def writeimagepyramid(part_data):
            logger = logging.getLogger(__name__)
            part, vol = part_data
            offset = part.get_offset() 
            zslice = offset.z
            from PIL import Image
            from scipy import ndimage
            import io
            s = default_dvid_session()
    
            # pad data with delimiter if needed
            timslice = vol[0, :, :]
            shiftx = offset.x % tilesize
            shifty = offset.y % tilesize
            tysize, txsize = timslice.shape
            ysize = tysize + shifty
            xsize = txsize + shiftx
            imslice = np.zeros((ysize, xsize))
            imslice[:,:] = delimiter
            imslice[shifty:ysize, shiftx:xsize] = timslice
            curry = (offset.y - shifty) // 2 
            currx = (offset.x - shiftx) // 2

            imlevels = []
            tileoffsetyx = []
            imlevels.append(imslice)
            tileoffsetyx.append((offset.y // tilesize, offset.x // tilesize))  

            with Timer() as downsample_timer:
                # use generic downsample algorithm
                for level in range(1, maxlevel+1):
                    
                    tysize, txsize = imlevels[level-1].shape
    
                    shiftx = currx % tilesize
                    shifty = curry % tilesize
                    
                    ysize = tysize + shifty
                    xsize = txsize + shiftx
                    imslice = np.zeros((ysize, xsize))
                    imslice[:,:] = delimiter
                    timslice = ndimage.interpolation.zoom(imlevels[level-1], 0.5)
                    imslice[shifty:ysize, shiftx:xsize] = timslice
                    imlevels.append(imslice) 
                    tileoffsetyx.append((currx // tilesize, curry // tilesize))  
                    
                    curry = (curry - shifty) // 2 
                    currx = (currx - shiftx) // 2

            logger.info("Downsampled {} levels in {:.3f} seconds".format(maxlevel, downsample_timer.seconds))

            # write tile pyramid using custom requests
            for levelnum in range(0, len(imlevels)):
                levelslice = imlevels[levelnum]
                dim1, dim2 = levelslice.shape

                num1tiles = (dim1-1) // tilesize + 1
                num2tiles = (dim2-1) // tilesize + 1

                with Timer() as post_timer:
                    for iter1 in range(0, num1tiles):
                        for iter2 in range(0, num2tiles):
                            # extract tile
                            tileholder = np.zeros((tilesize, tilesize), np.uint8)
                            tileholder[:,:] = delimiter
                            min1 = iter1*tilesize
                            min2 = iter2*tilesize
                            tileslice = levelslice[min1:min1+tilesize, min2:min2+tilesize]
                            t1, t2 = tileslice.shape
                            tileholder[0:t1, 0:t2] = tileslice
    
                            starty, startx = tileoffsetyx[levelnum]
                            starty += iter1
                            startx += iter2
                            if createtiles:
                                buf = BytesIO() 
                                img = Image.frombuffer('L', (tilesize, tilesize), tileholder.tostring(), 'raw', 'L', 0, 1)
                                img.save(buf, format="png")
    
                                urlreq = server + "/api/node/" + uuid + "/" + tilename + "/tile/xy/" + str(levelnum) + "/" + str(startx) + "_" + str(starty) + "_" + str(zslice)
                                s.post(urlreq , data=buf.getvalue())
                                buf.close()
                            
                            if createtilesjpeg:
                                buf = BytesIO() 
                                img = Image.frombuffer('L', (tilesize, tilesize), tileholder.tostring(), 'raw', 'L', 0, 1)
                                img.save(buf, format="jpeg")
    
                                urlreq = server + "/api/node/" + uuid + "/" + tilenamejpeg + "/tile/xy/" + str(levelnum) + "/" + str(startx) + "_" + str(starty) + "_" + str(zslice)
                                s.post(urlreq , data=buf.getvalue())
                                buf.close()
                logger.info("Posted {} tiles (level={}) in {} seconds".format( num1tiles*num2tiles, levelnum, post_timer.seconds ) )
Esempio n. 26
0
    def execute(self):
        server = str(self.config_data["dvid-info"]["dvid-server"])
        uuid = str(self.config_data["dvid-info"]["uuid"])
        source = str(self.config_data["dvid-info"]["source"])

        session = default_dvid_session()
        # determine grayscale blk extants
        if not server.startswith("http://"):
            server = "http://" + server

        req = session.get(server + "/api/node/" + uuid + "/" + source +
                          "/info")
        sourcemeta = req.json()

        # xmin, ymin, zmin not being used explicitly yet
        #xmin, ymin, zmin = sourcemeta["Extended"]["MinIndex"]
        xmin, ymin, zmin = 0, 0, 0
        xmax, ymax, zmax = sourcemeta["Extended"]["MaxIndex"]

        islabelblk = False
        datatype = sourcemeta["Extended"]["Values"][0]["Label"]
        if str(datatype) == "labelblk":
            islabelblk = True

        # !! always assume isotropic block
        BLKSIZE = int(sourcemeta["Extended"]["BlockSize"][0])

        maxdim = max(xmax, ymax, zmax)
        # build pyramid until BLKSIZE * 4
        import math
        maxlevel = int(math.log(maxdim + 1) / math.log(2)) - 2

        # assume 0,0,0 start for now
        xspan, yspan, zspan = xmax + 1, ymax + 1, zmax + 1

        xrunlimit = self.config_data["options"]["xrunlimit"]
        xrunlimit = xrunlimit + (xrunlimit % 2)  # should be even

        currsource = source

        # create source pyramid and append _level to name
        for level in range(1, maxlevel + 1):
            node_service = retrieve_node_service(server, uuid,
                                                 self.resource_server,
                                                 self.resource_port,
                                                 self.APPNAME)
            # !! limit to grayscale now
            prevsource = currsource
            currsource = source + ("_%d" % level)

            # TODO: set voxel resolution to base dataset (not too important in current workflows)
            if islabelblk:
                node_service.create_labelblk(currsource, None, BLKSIZE)
            else:
                node_service.create_grayscale8(currsource, BLKSIZE)
                # set extents for new volume (only need to do for grayscale)
                newsourceext = {}
                newsourceext["MinPoint"] = [0, 0, 0]  # for now no offset
                newsourceext["MaxPoint"] = [
                    ((xspan - 1) // 2 + 1) * BLKSIZE - 1,
                    ((yspan - 1) // 2 + 1) * BLKSIZE - 1,
                    ((zspan - 1) // 2 + 1) * BLKSIZE - 1
                ]
                session.post(server + "/api/node/" + uuid + "/" + currsource +
                             "/extents",
                             json=newsourceext)

            # determine number of requests
            maxxrun = xspan
            if xrunlimit > 0 and xrunlimit < xspan:
                maxxrun = xrunlimit
            if maxxrun % 2:
                maxxrun += 1

            xsize = xspan // maxxrun
            if xspan % maxxrun:
                xsize += 1
            ysize = (yspan + 1) // 2
            zsize = (zspan + 1) // 2
            resource_server = self.resource_server
            resource_port = self.resource_port

            for ziter2 in range(0, zsize, 2):
                workqueue = []
                for yiter in range(0, ysize):
                    for xiter in range(0, xsize):
                        for miniz in range(ziter2, ziter2 + 2):
                            workqueue.append((xiter, yiter, miniz))

                # parallelize jobs
                pieces = self.sc.parallelize(workqueue, len(workqueue))

                # grab data corresponding to xrun
                def retrievedata(coord):
                    xiter, yiter, ziter = coord
                    node_service = retrieve_node_service(
                        server, uuid, resource_server, resource_port)

                    shape_zyx = (BLKSIZE * 2, BLKSIZE * 2, maxxrun * BLKSIZE)
                    offset_zyx = (ziter * BLKSIZE * 2, yiter * BLKSIZE * 2,
                                  xiter * BLKSIZE * maxxrun)
                    vol_zyx = None
                    if islabelblk:
                        vol_zyx = node_service.get_labels3D(str(prevsource),
                                                            shape_zyx,
                                                            offset_zyx,
                                                            throttle=False)
                    else:
                        vol_zyx = node_service.get_gray3D(str(prevsource),
                                                          shape_zyx,
                                                          offset_zyx,
                                                          throttle=False)

                    return (coord, vol_zyx)

                volumedata = pieces.map(retrievedata)

                # downsample gray data
                def downsamplegray(vdata):
                    coords, data = vdata
                    from scipy import ndimage
                    data = ndimage.interpolation.zoom(data, 0.5)
                    return (coords, data)

                # downsample label data (TODO: make faster)
                def downsamplelabels(vdata):
                    coords, data = vdata
                    import numpy
                    zmax, ymax, xmax = data.shape
                    data2 = numpy.zeros(
                        (zmax // 2, ymax // 2, xmax // 2)).astype(numpy.uint64)

                    for ziter in range(0, zmax, 2):
                        for yiter in range(0, ymax, 2):
                            for xiter in range(0, xmax, 2):
                                v1 = data[ziter, yiter, xiter]
                                v2 = data[ziter, yiter, xiter + 1]
                                v3 = data[ziter, yiter + 1, xiter]
                                v4 = data[ziter, yiter + 1, xiter + 1]
                                v5 = data[ziter + 1, yiter, xiter]
                                v6 = data[ziter + 1, yiter, xiter + 1]
                                v7 = data[ziter + 1, yiter + 1, xiter]
                                v8 = data[ziter + 1, yiter + 1, xiter + 1]

                                freqs = {}
                                freqs[v2] = 0
                                freqs[v3] = 0
                                freqs[v4] = 0
                                freqs[v5] = 0
                                freqs[v6] = 0
                                freqs[v7] = 0
                                freqs[v8] = 0

                                freqs[v1] = 1
                                freqs[v2] += 1
                                freqs[v3] += 1
                                freqs[v4] += 1
                                freqs[v5] += 1
                                freqs[v6] += 1
                                freqs[v7] += 1
                                freqs[v8] += 1

                                maxval = 0
                                freqkey = 0
                                for key, val in freqs.items():
                                    if val > maxval:
                                        maxval = val
                                        freqkey = key

                                data2[ziter // 2, yiter // 2,
                                      xiter // 2] = freqkey

                    return (coords, data2)

                downsampleddata = None
                if islabelblk:
                    downsampleddata = volumedata.map(downsamplelabels)
                else:
                    downsampleddata = volumedata.map(downsamplegray)

                appname = self.APPNAME
                delimiter = self.config_data["options"]["blankdelimiter"]

                # write results ?!
                def write2dvid(vdata):
                    from libdvid import ConnectionMethod
                    import numpy
                    node_service = retrieve_node_service(
                        server, uuid, resource_server, resource_port, appname)

                    coords, data = vdata
                    xiter, yiter, ziter = coords

                    # set block indices
                    zbindex = ziter
                    ybindex = yiter

                    zsize, ysize, xsize = data.shape
                    #xrun = xsize/BLKSIZE
                    xbindex = xiter * maxxrun // 2

                    # retrieve blocks
                    blockbuffer = ""

                    # skip blank blocks
                    startblock = False
                    xrun = 0

                    if islabelblk:
                        vals = numpy.unique(data)
                        # TODO: ignore blank blocks within an x line
                        if not (len(vals) == 1 and vals[0] == 0):
                            if resource_server != "":
                                node_service.put_labels3D(
                                    currsource,
                                    data,
                                    (zbindex * BLKSIZE, ybindex * BLKSIZE,
                                     xbindex * BLKSIZE),
                                    compress=True,
                                    throttle=False)
                            else:
                                node_service.put_labels3D(
                                    currsource,
                                    data,
                                    (zbindex * BLKSIZE, ybindex * BLKSIZE,
                                     xbindex * BLKSIZE),
                                    compress=True)
                    else:
                        for iterx in range(0, xsize, BLKSIZE):
                            block = data[:, :, iterx:iterx + BLKSIZE]
                            vals = numpy.unique(block)
                            if len(vals) == 1 and vals[0] == delimiter:
                                # check if the block is blank
                                if startblock:
                                    # if the previous block has data, push blocks in current queue
                                    node_service.custom_request(
                                        str((currsource +
                                             "/blocks/%d_%d_%d/%d") %
                                            (xbindex, ybindex, zbindex, xrun)),
                                        blockbuffer, ConnectionMethod.POST)
                                    startblock = False
                                    xrun = 0
                                    blockbuffer = ""

                            else:
                                if startblock == False:
                                    xbindex = xiter * maxxrun // 2 + iterx // BLKSIZE

                                startblock = True
                                blockbuffer += block.tobytes()
                                xrun += 1

                        # write-out leftover blocks
                        if xrun > 0:
                            node_service.custom_request(
                                str((currsource + "/blocks/%d_%d_%d/%d") %
                                    (xbindex, ybindex, zbindex, xrun)),
                                blockbuffer, ConnectionMethod.POST)

                downsampleddata.foreach(write2dvid)

            # adjust max coordinate for new level
            xspan = (xspan - 1) // 2
            yspan = (yspan - 1) // 2
            zspan = (zspan - 1) // 2
    def execute(self):
        # tile size default
        TILESIZE = 512
        
        server = str(self.config_data["dvid-info"]["dvid-server"])
        uuid = str(self.config_data["dvid-info"]["uuid"])
        grayname = str(self.config_data["dvid-info"]["grayname"])
        tilename = str(self.config_data["dvid-info"]["tilename"])
        resource_server = self.resource_server
        resource_port = self.resource_port

        # determine grayscale blk extants
        if not server.startswith("http://"):
            server = "http://" + server

        session = default_dvid_session()
        req = session.get(server + "/api/node/" + uuid + "/" + grayname + "/info")
        graymeta = req.json()
        
        xmin, ymin, zmin = graymeta["Extended"]["MinIndex"] 
        xmax, ymax, zmax = graymeta["Extended"]["MaxIndex"] 
        
        # !! always assume isotropic block
        BLKSIZE = int(graymeta["Extended"]["BlockSize"][0])

        imformat = str(self.config_data["options"]["format"])
        # create tiles type and meta
        session.post(server + "/api/repo/" + uuid + "/instance", json={"typename": "imagetile", "dataname": tilename, "source": grayname, "format": imformat})

        MinTileCoord = [xmin*BLKSIZE/TILESIZE, ymin*BLKSIZE/TILESIZE, zmin*BLKSIZE/TILESIZE]
        MaxTileCoord = [xmax*BLKSIZE/TILESIZE, ymax*BLKSIZE/TILESIZE, zmax*BLKSIZE/TILESIZE]
        
        # get max level by just finding max tile coord
        maxval = max(MaxTileCoord)
        minval = abs(min(MinTileCoord))
        maxval = max(minval, maxval) + 1
        import math
        maxlevel = int(math.log(maxval) / math.log(2))

        tilemeta = {}
        tilemeta["MinTileCoord"] = MinTileCoord
        tilemeta["MaxTileCoord"] = MaxTileCoord
        tilemeta["Levels"] = {}
        currres = 10.0 # just use as placeholder for now
        for level in range(0, maxlevel+1):
            tilemeta["Levels"][str(level)] = { "Resolution" : [currres, currres, currres], "TileSize": [TILESIZE, TILESIZE, TILESIZE]}
            currres *= 2
        
        session.post(server + "/api/node/" + uuid + "/" + tilename + "/metadata", json=tilemeta)
        
        numiters = zmax+1
        axis = str(self.config_data["options"]["axis"])

        if axis == "xz":
            numiters = ymax+1 
        elif axis == "yz":
            numiters = xmax+1

        # retrieve 32 slices at a time and generate all tiles
        # TODO: only fetch 1 slice at a time if 32 slices cannot fit in memory
        blkiters = self.sparkdvid_context.sc.parallelize(list(range(0,numiters)), numiters) 
        
        def retrieveslices(blknum):
            # grab slice with 3d volume call
            node_service = retrieve_node_service(server, uuid, resource_server, resource_port)
            vol = None
          
            if resource_server != "": 
                # Note: libdvid uses zyx order for python functions
                if axis == "xy":
                    shape_zyx = ( BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE )
                    offset_zyx = (blknum*BLKSIZE, ymin*BLKSIZE, xmin*BLKSIZE)
                    vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False)
                    vol = vol_zyx
                elif axis == "xz":
                    shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE )
                    offset_zyx = (zmin*BLKSIZE, blknum*BLKSIZE, xmin*BLKSIZE)
                    vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False )
                    vol_yzx = vol_zyx.transpose((1,0,2))
                    vol = vol_yzx
                else:
                    shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, BLKSIZE )
                    offset_zyx = ( zmin*BLKSIZE, ymin*BLKSIZE, blknum*BLKSIZE )
                    vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False )
                    vol = vol_zyx.transpose((2,0,1))
            else:
                if axis == "xy":
                    shape_zyx = ( BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE )
                    offset_zyx = (blknum*BLKSIZE, ymin*BLKSIZE, xmin*BLKSIZE)
                    vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx)
                    vol = vol_zyx
                elif axis == "xz":
                    shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE )
                    offset_zyx = (zmin*BLKSIZE, blknum*BLKSIZE, xmin*BLKSIZE)
                    vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx )
                    vol_yzx = vol_zyx.transpose((1,0,2))
                    vol = vol_yzx
                else:
                    shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, BLKSIZE )
                    offset_zyx = ( zmin*BLKSIZE, ymin*BLKSIZE, blknum*BLKSIZE )
                    vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx )
                    vol = vol_zyx.transpose((2,0,1))

            return (blknum, vol)

        imagedata = blkiters.map(retrieveslices)

        # ?! assume 0,0 starting coordinate for now for debuggin simplicity
        def writeimagepyramid(vol3d):
            blknum, vol = vol3d
            
            from PIL import Image
            from scipy import ndimage
            import io
            import numpy
            s = default_dvid_session()

            # create thread pool for parallel
            from multiprocessing.dummy import Pool as ThreadPool
            NUM_THREADS = 4
            ACTIVE_REQS = 16
            #pool = ThreadPool(NUM_THREADS)
             
            # actually perform tile load
            def loadTile(reqpair):
                urlreq, reqbuff = reqpair 
                s.post(urlreq , data=reqbuff)

            work_queue = []
            # iterate slice by slice
            for slicenum in range(0, BLKSIZE):
                imslice = vol[slicenum, :, :]
                imlevels = []
                imlevels.append(imslice)
                # use generic downsample algorithm
                for level in range(1, maxlevel+1):
                    dim1, dim2 = imlevels[level-1].shape
                    # go to max level regardless of actual image size
                    #if dim1 < TILESIZE and dim2 < TILESIZE:
                        # image size is already smaller even though not at max level
                        #print "Not at max level"
                    #    break
                    imlevels.append(ndimage.interpolation.zoom(imlevels[level-1], 0.5)) 

                # write pyramid for each slice using custom request
                for levelnum in range(0, len(imlevels)):
                    levelslice = imlevels[levelnum]
                    dim1, dim2 = levelslice.shape

                    num1tiles = (dim1-1) // TILESIZE + 1
                    num2tiles = (dim2-1) // TILESIZE + 1

                    for iter1 in range(0, num1tiles):
                        for iter2 in range(0, num2tiles):
                            # extract tile
                            tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8)
                            min1 = iter1*TILESIZE
                            min2 = iter2*TILESIZE
                            tileslice = levelslice[min1:min1+TILESIZE, min2:min2+TILESIZE]
                            t1, t2 = tileslice.shape
                            tileholder[0:t1, 0:t2] = tileslice

                            # write tileholder to dvid
                            buf = BytesIO() 
                            img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1)
                            imformatpil = imformat
                            if imformat == "jpg":
                                imformatpil = "jpeg"
                            img.save(buf, format=imformatpil)

                            if axis == "xy":
                                work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum+blknum*BLKSIZE), buf.getvalue()))
                            elif axis == "xz":
                                work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter1), buf.getvalue()))
                            else:
                                work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter2) + "_" + str(iter1), buf.getvalue()))
                            buf.close()

                            # submit last jobs if any remain
                            if len(work_queue) == ACTIVE_REQS:
                                pool = ThreadPool(NUM_THREADS)
                                pool.map(loadTile, work_queue)
                    
                                # close the pool to further requests
                                pool.close()
                                # wait for any remaining threads
                                pool.join()
                                work_queue = []

                # submit last jobs if any remain
                if len(work_queue) > 0:
                    pool = ThreadPool(NUM_THREADS)
                    pool.map(loadTile, work_queue)
    
                    # close the pool to further requests
                    pool.close()
                    # wait for any remaining threads
                    pool.join()
                

        imagedata.foreach(writeimagepyramid)
Esempio n. 28
0
    def execute(self):
        from PIL import Image
        import numpy
        import os

        iterslices = self.BLKSIZE * self.config_data["options"][
            "numblocklayers"]

        minslice = self.config_data["minslice"]
        # map file to numpy array
        basename = self.config_data["basename"]

        # format should be gs://<bucket>/path
        gbucketname = ""
        gpath = ""
        if basename.startswith('gs://'):
            # parse google bucket names
            tempgs = basename.split('//')
            bucketpath = tempgs[1].split('/')
            gbucketname = bucketpath[0]
            gpath = '/'.join(bucketpath[1:])

        server = None

        xoffset = yoffset = zoffset = 0

        if "offset" in self.config_data["options"]:
            xoffset = self.config_data["options"]["offset"][0]
            yoffset = self.config_data["options"]["offset"][1]
            zoffset = self.config_data["options"]["offset"][2]

            if xoffset % self.BLKSIZE != 0:
                raise Exception("offset not block aligned")
            if yoffset % self.BLKSIZE != 0:
                raise Exception("offset not block aligned")
            if zoffset % self.BLKSIZE != 0:
                raise Exception("offset not block aligned")

            xoffset /= self.BLKSIZE
            yoffset /= self.BLKSIZE
            zoffset /= self.BLKSIZE

        # this will start the Z block writing at the specified offse
        # (changes default behavior when loading nonzero starting image slice)
        zoffset -= (minslice // self.BLKSIZE)

        # create metadata before workers start if using DVID
        if "output-dir" not in self.config_data or self.config_data[
                "output-dir"] == "":
            # write to dvid
            server = self.config_data["dvid-info"]["dvid-server"]
            uuid = self.config_data["dvid-info"]["uuid"]
            grayname = self.config_data["dvid-info"]["grayname"]
            resource_server = str(self.resource_server)
            resource_port = self.resource_port

            # create grayscale type
            node_service = retrieve_node_service(server, uuid, resource_server,
                                                 resource_port, self.APPNAME)
            node_service.create_grayscale8(str(grayname), self.BLKSIZE)

        for slice in range(self.config_data["minslice"],
                           self.config_data["maxslice"] + 1, iterslices):
            # parallelize images across many machines
            imgs = self.sc.parallelize(list(range(slice, slice + iterslices)),
                                       iterslices)

            def img2npy(slicenum):
                try:
                    img = None
                    if gbucketname == "":
                        img = Image.open(basename % slicenum)
                    else:
                        from gcloud import storage
                        from io import BytesIO
                        client = storage.Client()
                        gbucket = client.get_bucket(gbucketname)
                        gblob = gbucket.get_blob(gpath % slicenum)

                        # write to bytes which implements file interface
                        gblobfile = BytesIO()
                        gblob.download_to_file(gblobfile)
                        gblobfile.seek(0)
                        img = Image.open(gblobfile)
                    return slicenum, numpy.array(img)
                except Exception as e:
                    # just return a blank slice -- will be handled downstream
                    return slicenum, numpy.zeros((0, 0), numpy.uint8)

            npy_images = imgs.map(img2npy)

            # map numpy array into y lines of block height
            blocksize = self.BLKSIZE
            blocklimit = self.BLOCKLIMIT

            def npy2lines(arrpair):
                z, arr = arrpair
                ysize, xsize = arr.shape
                npylines = []

                for itery in range(0, ysize, blocksize):
                    line = numpy.zeros(
                        (blocksize,
                         ((xsize - 1) // blocksize + 1) * blocksize),
                        numpy.uint8)
                    uppery = blocksize
                    if (itery + blocksize) > ysize:
                        uppery = ysize - itery

                    line[0:uppery, 0:xsize] = arr[itery:itery + blocksize,
                                                  0:xsize]

                    npylines.append((itery // blocksize, (z, line)))

                return npylines

            npy_lines = npy_images.flatMap(npy2lines)

            # reduce y lines into DVID blocks
            groupedlines = npy_lines.groupByKey()

            # map y lines => (y, blocks)
            def lines2blocks(linespair):
                y, linesp = linespair

                xsize = None
                blockdata = None
                for z, line in linesp:
                    if xsize is None:
                        _, xsize = line.shape
                        blockdata = numpy.zeros((iterslices, blocksize, xsize),
                                                numpy.uint8)

                    blockdata[(z - minslice) % iterslices, :, :] = line
                return y, blockdata

            yblocks = groupedlines.map(lines2blocks)

            # map multilayer of blocks to an array of single layer blocks
            def multi2single(yblocks):
                ybindex, blocks = yblocks
                blockarr = []
                num_layers = iterslices // blocksize
                for layer in range(0, num_layers):
                    blockarr.append(
                        ((ybindex, layer),
                         blocks[layer * blocksize:(layer * blocksize +
                                                   blocksize), :, :]))

                return blockarr

            yblockssplit = yblocks.flatMap(multi2single)

            if "output-dir" in self.config_data and self.config_data[
                    "output-dir"] != "":
                # write blocks to disk for separte post-process -- write directly to DVID eventually?
                output_dir = self.config_data["output-dir"]

                def write2disk(yblocks):
                    zbindex = slice // blocksize
                    (ybindex, layer), blocks = yblocks
                    zbindex += layer

                    zsize, ysize, xsize = blocks.shape

                    outdir = output_dir
                    outdir += "/" + ("%05d" % zbindex) + ".z/"
                    filename = outdir + ("%05d" % ybindex) + "-" + str(
                        xsize // blocksize) + ".blocks"

                    try:
                        os.makedirs(outdir)
                    except Exception as e:
                        pass

                    # extract blocks from buffer and write to disk
                    fout = open(filename, 'wb')
                    for iterx in range(0, xsize, blocksize):
                        block = blocks[:, :, iterx:iterx + blocksize].copy()
                        fout.write(block)
                    fout.close()

                yblockssplit.foreach(write2disk)
            else:
                # write to dvid
                server = self.config_data["dvid-info"]["dvid-server"]
                uuid = self.config_data["dvid-info"]["uuid"]
                grayname = self.config_data["dvid-info"]["grayname"]
                appname = self.APPNAME
                delimiter = self.config_data["options"]["blankdelimiter"]

                def write2dvid(yblocks):
                    from libdvid import ConnectionMethod
                    import numpy
                    node_service = retrieve_node_service(
                        server, uuid, resource_server, resource_port, appname)

                    # get block coordinates
                    zbindex = slice // blocksize
                    (ybindex, layer), blocks = yblocks
                    zbindex += layer
                    zsize, ysize, xsize = blocks.shape
                    xrun = xsize // blocksize
                    xbindex = 0  # assume x starts at 0!!

                    # retrieve blocks
                    blockbuffer = ""

                    # skip blank blocks
                    startblock = False
                    xrun = 0
                    xbindex = 0

                    for iterx in range(0, xsize, blocksize):
                        block = blocks[:, :, iterx:iterx + blocksize].copy()
                        vals = numpy.unique(block)
                        if len(vals) == 1 and vals[0] == delimiter:
                            # check if the block is blank
                            if startblock:
                                # if the previous block has data, push blocks in current queue
                                node_service.custom_request(
                                    str((grayname + "/blocks/%d_%d_%d/%d") %
                                        (xbindex + xoffset, ybindex + yoffset,
                                         zbindex + zoffset, xrun)),
                                    blockbuffer, ConnectionMethod.POST)
                                startblock = False
                                xrun = 0
                                blockbuffer = ""

                        else:
                            if startblock == False:
                                xbindex = iterx // blocksize

                            startblock = True
                            blockbuffer += block.tobytes()
                            xrun += 1

                            if blocklimit > 0 and xrun >= blocklimit:
                                # if the previous block has data, push blocks in current queue
                                node_service.custom_request(
                                    str((grayname + "/blocks/%d_%d_%d/%d") %
                                        (xbindex + xoffset, ybindex + yoffset,
                                         zbindex + zoffset, xrun)),
                                    blockbuffer, ConnectionMethod.POST)
                                startblock = False
                                xrun = 0
                                blockbuffer = ""

                    # write-out leftover blocks
                    if xrun > 0:
                        node_service.custom_request(
                            str((grayname + "/blocks/%d_%d_%d/%d") %
                                (xbindex + xoffset, ybindex + yoffset,
                                 zbindex + zoffset, xrun)), blockbuffer,
                            ConnectionMethod.POST)

                yblockssplit.foreach(write2dvid)

            self.workflow_entry_exit_printer.write_data("Ingested %d slices" %
                                                        iterslices)

        # just fetch one image at driver to get dims
        width = height = 1
        try:
            img = None
            if gbucketname == "":
                img = Image.open(basename % minslice)
                width, height = img.width, img.height
            else:
                from gcloud import storage
                from io import BytesIO
                client = storage.Client()
                gbucket = client.get_bucket(gbucketname)
                gblob = gbucket.get_blob(gpath % minslice)

                # write to bytes which implements file interface
                gblobfile = BytesIO()
                gblob.download_to_file(gblobfile)
                gblobfile.seek(0)
                img = Image.open(gblobfile)
                width, height = img.width, img.height
        except Exception as e:
            # just set size to 1
            pass

        if "output-dir" not in self.config_data or self.config_data[
                "output-dir"] == "":
            # update metadata
            grayext = {}
            grayext["MinPoint"] = [
                xoffset * self.BLKSIZE, yoffset * self.BLKSIZE,
                zoffset * self.BLKSIZE + minslice
            ]
            grayext["MaxPoint"] = [
                xoffset * self.BLKSIZE + width - 1,
                yoffset * self.BLKSIZE + height - 1, zoffset * self.BLKSIZE +
                minslice + self.config_data["maxslice"]
            ]
            if not server.startswith("http://"):
                server = "http://" + server
            session = default_dvid_session()
            session.post(server + "/api/node/" + uuid + "/" + grayname +
                         "/extents",
                         json=grayext)
Esempio n. 29
0
    def execute(self):
        # tile size default
        TILESIZE = 512

        server = str(self.config_data["dvid-info"]["dvid-server"])
        uuid = str(self.config_data["dvid-info"]["uuid"])
        grayname = str(self.config_data["dvid-info"]["grayname"])
        tilename = str(self.config_data["dvid-info"]["tilename"])
        resource_server = self.resource_server
        resource_port = self.resource_port

        # determine grayscale blk extants
        if not server.startswith("http://"):
            server = "http://" + server

        session = default_dvid_session()
        req = session.get(server + "/api/node/" + uuid + "/" + grayname +
                          "/info")
        graymeta = req.json()

        xmin, ymin, zmin = graymeta["Extended"]["MinIndex"]
        xmax, ymax, zmax = graymeta["Extended"]["MaxIndex"]

        # !! always assume isotropic block
        BLKSIZE = int(graymeta["Extended"]["BlockSize"][0])

        imformat = str(self.config_data["options"]["format"])
        # create tiles type and meta
        session.post(server + "/api/repo/" + uuid + "/instance",
                     json={
                         "typename": "imagetile",
                         "dataname": tilename,
                         "source": grayname,
                         "format": imformat
                     })

        MinTileCoord = [
            xmin * BLKSIZE / TILESIZE, ymin * BLKSIZE / TILESIZE,
            zmin * BLKSIZE / TILESIZE
        ]
        MaxTileCoord = [
            xmax * BLKSIZE / TILESIZE, ymax * BLKSIZE / TILESIZE,
            zmax * BLKSIZE / TILESIZE
        ]

        # get max level by just finding max tile coord
        maxval = max(MaxTileCoord)
        minval = abs(min(MinTileCoord))
        maxval = max(minval, maxval) + 1
        import math
        maxlevel = int(math.log(maxval) / math.log(2))

        tilemeta = {}
        tilemeta["MinTileCoord"] = MinTileCoord
        tilemeta["MaxTileCoord"] = MaxTileCoord
        tilemeta["Levels"] = {}
        currres = 10.0  # just use as placeholder for now
        for level in range(0, maxlevel + 1):
            tilemeta["Levels"][str(level)] = {
                "Resolution": [currres, currres, currres],
                "TileSize": [TILESIZE, TILESIZE, TILESIZE]
            }
            currres *= 2

        session.post(server + "/api/node/" + uuid + "/" + tilename +
                     "/metadata",
                     json=tilemeta)

        numiters = zmax + 1
        axis = str(self.config_data["options"]["axis"])

        if axis == "xz":
            numiters = ymax + 1
        elif axis == "yz":
            numiters = xmax + 1

        # retrieve 32 slices at a time and generate all tiles
        # TODO: only fetch 1 slice at a time if 32 slices cannot fit in memory
        blkiters = self.sparkdvid_context.sc.parallelize(
            list(range(0, numiters)), numiters)

        def retrieveslices(blknum):
            # grab slice with 3d volume call
            node_service = retrieve_node_service(server, uuid, resource_server,
                                                 resource_port)
            vol = None

            if resource_server != "":
                # Note: libdvid uses zyx order for python functions
                if axis == "xy":
                    shape_zyx = (BLKSIZE,
                                 (ymax + 1) * BLKSIZE - ymin * BLKSIZE,
                                 (xmax + 1) * BLKSIZE - xmin * BLKSIZE)
                    offset_zyx = (blknum * BLKSIZE, ymin * BLKSIZE,
                                  xmin * BLKSIZE)
                    vol_zyx = node_service.get_gray3D(str(grayname),
                                                      shape_zyx,
                                                      offset_zyx,
                                                      throttle=False)
                    vol = vol_zyx
                elif axis == "xz":
                    shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE,
                                 BLKSIZE,
                                 (xmax + 1) * BLKSIZE - xmin * BLKSIZE)
                    offset_zyx = (zmin * BLKSIZE, blknum * BLKSIZE,
                                  xmin * BLKSIZE)
                    vol_zyx = node_service.get_gray3D(str(grayname),
                                                      shape_zyx,
                                                      offset_zyx,
                                                      throttle=False)
                    vol_yzx = vol_zyx.transpose((1, 0, 2))
                    vol = vol_yzx
                else:
                    shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE,
                                 (ymax + 1) * BLKSIZE - ymin * BLKSIZE,
                                 BLKSIZE)
                    offset_zyx = (zmin * BLKSIZE, ymin * BLKSIZE,
                                  blknum * BLKSIZE)
                    vol_zyx = node_service.get_gray3D(str(grayname),
                                                      shape_zyx,
                                                      offset_zyx,
                                                      throttle=False)
                    vol = vol_zyx.transpose((2, 0, 1))
            else:
                if axis == "xy":
                    shape_zyx = (BLKSIZE,
                                 (ymax + 1) * BLKSIZE - ymin * BLKSIZE,
                                 (xmax + 1) * BLKSIZE - xmin * BLKSIZE)
                    offset_zyx = (blknum * BLKSIZE, ymin * BLKSIZE,
                                  xmin * BLKSIZE)
                    vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx,
                                                      offset_zyx)
                    vol = vol_zyx
                elif axis == "xz":
                    shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE,
                                 BLKSIZE,
                                 (xmax + 1) * BLKSIZE - xmin * BLKSIZE)
                    offset_zyx = (zmin * BLKSIZE, blknum * BLKSIZE,
                                  xmin * BLKSIZE)
                    vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx,
                                                      offset_zyx)
                    vol_yzx = vol_zyx.transpose((1, 0, 2))
                    vol = vol_yzx
                else:
                    shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE,
                                 (ymax + 1) * BLKSIZE - ymin * BLKSIZE,
                                 BLKSIZE)
                    offset_zyx = (zmin * BLKSIZE, ymin * BLKSIZE,
                                  blknum * BLKSIZE)
                    vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx,
                                                      offset_zyx)
                    vol = vol_zyx.transpose((2, 0, 1))

            return (blknum, vol)

        imagedata = blkiters.map(retrieveslices)

        # ?! assume 0,0 starting coordinate for now for debuggin simplicity
        def writeimagepyramid(vol3d):
            blknum, vol = vol3d

            from PIL import Image
            from scipy import ndimage
            import io
            import numpy
            s = default_dvid_session()

            # create thread pool for parallel
            from multiprocessing.dummy import Pool as ThreadPool
            NUM_THREADS = 4
            ACTIVE_REQS = 16

            #pool = ThreadPool(NUM_THREADS)

            # actually perform tile load
            def loadTile(reqpair):
                urlreq, reqbuff = reqpair
                s.post(urlreq, data=reqbuff)

            work_queue = []
            # iterate slice by slice
            for slicenum in range(0, BLKSIZE):
                imslice = vol[slicenum, :, :]
                imlevels = []
                imlevels.append(imslice)
                # use generic downsample algorithm
                for level in range(1, maxlevel + 1):
                    dim1, dim2 = imlevels[level - 1].shape
                    # go to max level regardless of actual image size
                    #if dim1 < TILESIZE and dim2 < TILESIZE:
                    # image size is already smaller even though not at max level
                    #print "Not at max level"
                    #    break
                    imlevels.append(
                        ndimage.interpolation.zoom(imlevels[level - 1], 0.5))

                # write pyramid for each slice using custom request
                for levelnum in range(0, len(imlevels)):
                    levelslice = imlevels[levelnum]
                    dim1, dim2 = levelslice.shape

                    num1tiles = (dim1 - 1) // TILESIZE + 1
                    num2tiles = (dim2 - 1) // TILESIZE + 1

                    for iter1 in range(0, num1tiles):
                        for iter2 in range(0, num2tiles):
                            # extract tile
                            tileholder = numpy.zeros((TILESIZE, TILESIZE),
                                                     numpy.uint8)
                            min1 = iter1 * TILESIZE
                            min2 = iter2 * TILESIZE
                            tileslice = levelslice[min1:min1 + TILESIZE,
                                                   min2:min2 + TILESIZE]
                            t1, t2 = tileslice.shape
                            tileholder[0:t1, 0:t2] = tileslice

                            # write tileholder to dvid
                            buf = BytesIO()
                            img = Image.frombuffer('L', (TILESIZE, TILESIZE),
                                                   tileholder.tostring(),
                                                   'raw', 'L', 0, 1)
                            imformatpil = imformat
                            if imformat == "jpg":
                                imformatpil = "jpeg"
                            img.save(buf, format=imformatpil)

                            if axis == "xy":
                                work_queue.append(
                                    (server + "/api/node/" + uuid + "/" +
                                     tilename + "/tile/" + axis + "/" +
                                     str(levelnum) + "/" + str(iter2) + "_" +
                                     str(iter1) + "_" +
                                     str(slicenum + blknum * BLKSIZE),
                                     buf.getvalue()))
                            elif axis == "xz":
                                work_queue.append(
                                    (server + "/api/node/" + uuid + "/" +
                                     tilename + "/tile/" + axis + "/" +
                                     str(levelnum) + "/" + str(iter2) + "_" +
                                     str(slicenum + blknum * BLKSIZE) + "_" +
                                     str(iter1), buf.getvalue()))
                            else:
                                work_queue.append(
                                    (server + "/api/node/" + uuid + "/" +
                                     tilename + "/tile/" + axis + "/" +
                                     str(levelnum) + "/" +
                                     str(slicenum + blknum * BLKSIZE) + "_" +
                                     str(iter2) + "_" + str(iter1),
                                     buf.getvalue()))
                            buf.close()

                            # submit last jobs if any remain
                            if len(work_queue) == ACTIVE_REQS:
                                pool = ThreadPool(NUM_THREADS)
                                pool.map(loadTile, work_queue)

                                # close the pool to further requests
                                pool.close()
                                # wait for any remaining threads
                                pool.join()
                                work_queue = []

                # submit last jobs if any remain
                if len(work_queue) > 0:
                    pool = ThreadPool(NUM_THREADS)
                    pool.map(loadTile, work_queue)

                    # close the pool to further requests
                    pool.close()
                    # wait for any remaining threads
                    pool.join()

        imagedata.foreach(writeimagepyramid)
    def execute(self):
        server = str(self.config_data["dvid-info"]["dvid-server"])
        uuid = str(self.config_data["dvid-info"]["uuid"])
        source = str(self.config_data["dvid-info"]["source"])

        session = default_dvid_session()        
        # determine grayscale blk extants
        if not server.startswith("http://"):
            server = "http://" + server

        req = session.get(server + "/api/node/" + uuid + "/" + source + "/info")
        sourcemeta = req.json()
       
        # xmin, ymin, zmin not being used explicitly yet
        #xmin, ymin, zmin = sourcemeta["Extended"]["MinIndex"] 
        xmin, ymin, zmin = 0, 0, 0 
        xmax, ymax, zmax = sourcemeta["Extended"]["MaxIndex"] 
       
        islabelblk = False
        datatype = sourcemeta["Extended"]["Values"][0]["Label"]
        if str(datatype) == "labelblk":
            islabelblk = True

        # !! always assume isotropic block
        BLKSIZE = int(sourcemeta["Extended"]["BlockSize"][0])

        maxdim = max(xmax,ymax,zmax)
        # build pyramid until BLKSIZE * 4
        import math
        maxlevel = int(math.log(maxdim+1) / math.log(2)) - 2

        # assume 0,0,0 start for now
        xspan, yspan, zspan = xmax+1, ymax+1, zmax+1
        
        xrunlimit = self.config_data["options"]["xrunlimit"]
        xrunlimit = xrunlimit + (xrunlimit % 2) # should be even

        currsource = source

        # create source pyramid and append _level to name
        for level in range(1, maxlevel+1):
            node_service = retrieve_node_service(server, uuid, self.resource_server, self.resource_port, self.APPNAME)
            # !! limit to grayscale now
            prevsource = currsource
            currsource = source + ("_%d" % level)
            
            # TODO: set voxel resolution to base dataset (not too important in current workflows)
            if islabelblk:
                node_service.create_labelblk(currsource, None, BLKSIZE)
            else:
                node_service.create_grayscale8(currsource, BLKSIZE)
                # set extents for new volume (only need to do for grayscale)
                newsourceext = {}
                newsourceext["MinPoint"] = [0,0,0] # for now no offset
                newsourceext["MaxPoint"] = [((xspan-1) // 2+1)*BLKSIZE-1,((yspan-1) // 2+1)*BLKSIZE-1,((zspan-1) // 2+1)*BLKSIZE-1]
                session.post(server + "/api/node/" + uuid + "/" + currsource + "/extents", json=newsourceext)

            # determine number of requests
            maxxrun = xspan
            if xrunlimit > 0 and xrunlimit < xspan:
                maxxrun = xrunlimit
            if maxxrun % 2:
                maxxrun += 1

            xsize = xspan // maxxrun
            if xspan % maxxrun:
                xsize += 1
            ysize = (yspan+1) // 2
            zsize = (zspan+1) // 2
            resource_server = self.resource_server
            resource_port = self.resource_port

            for ziter2 in range(0, zsize, 2):
                workqueue = []
                for yiter in range(0, ysize):
                    for xiter in range(0, xsize):
                        for miniz in range(ziter2, ziter2+2):
                            workqueue.append((xiter,yiter,miniz))

                # parallelize jobs
                pieces = self.sc.parallelize(workqueue, len(workqueue))

                # grab data corresponding to xrun
                def retrievedata(coord):
                    xiter, yiter, ziter = coord
                    node_service = retrieve_node_service(server, uuid, resource_server, resource_port)

                    shape_zyx = ( BLKSIZE*2, BLKSIZE*2, maxxrun*BLKSIZE )
                    offset_zyx = (ziter*BLKSIZE*2, yiter*BLKSIZE*2, xiter*BLKSIZE*maxxrun)
                    vol_zyx = None
                    if islabelblk:
                        vol_zyx = node_service.get_labels3D( str(prevsource), shape_zyx, offset_zyx, throttle=False)
                    else:
                        vol_zyx = node_service.get_gray3D( str(prevsource), shape_zyx, offset_zyx, throttle=False)

                    return (coord, vol_zyx)

                volumedata = pieces.map(retrievedata)

                # downsample gray data
                def downsamplegray(vdata):
                    coords, data = vdata
                    from scipy import ndimage
                    data = ndimage.interpolation.zoom(data, 0.5)
                    return (coords, data)

                # downsample label data (TODO: make faster)
                def downsamplelabels(vdata):
                    coords, data = vdata
                    import numpy 
                    zmax, ymax, xmax = data.shape
                    data2 = numpy.zeros((zmax // 2, ymax // 2, xmax // 2)).astype(numpy.uint64)

                    for ziter in range(0,zmax,2):
                        for yiter in range(0, ymax,2):
                            for xiter in range(0,xmax,2):
                                v1 = data[ziter, yiter, xiter] 
                                v2 = data[ziter, yiter, xiter+1] 
                                v3 = data[ziter, yiter+1, xiter] 
                                v4 = data[ziter, yiter+1, xiter+1] 
                                v5 = data[ziter+1, yiter, xiter] 
                                v6 = data[ziter+1, yiter, xiter+1] 
                                v7 = data[ziter+1, yiter+1, xiter] 
                                v8 = data[ziter+1, yiter+1, xiter+1]

                                freqs = {}
                                freqs[v2] = 0
                                freqs[v3] = 0
                                freqs[v4] = 0
                                freqs[v5] = 0
                                freqs[v6] = 0
                                freqs[v7] = 0
                                freqs[v8] = 0
                                
                                freqs[v1] = 1
                                freqs[v2] += 1
                                freqs[v3] += 1
                                freqs[v4] += 1
                                freqs[v5] += 1
                                freqs[v6] += 1
                                freqs[v7] += 1
                                freqs[v8] += 1

                                maxval = 0
                                freqkey = 0
                                for key, val in freqs.items():
                                        if val > maxval:
                                                maxval = val
                                                freqkey = key
        
                                data2[ziter // 2, yiter // 2, xiter // 2] = freqkey
            
                    return (coords, data2)

                downsampleddata = None
                if islabelblk:
                    downsampleddata = volumedata.map(downsamplelabels)
                else:
                    downsampleddata = volumedata.map(downsamplegray)

                appname = self.APPNAME
                delimiter = self.config_data["options"]["blankdelimiter"]
                
                # write results ?!
                def write2dvid(vdata):
                    from libdvid import ConnectionMethod
                    import numpy
                    node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) 
                    
                    coords, data = vdata 
                    xiter, yiter, ziter = coords

                    # set block indices
                    zbindex = ziter
                    ybindex = yiter

                    zsize,ysize,xsize = data.shape
                    #xrun = xsize/BLKSIZE
                    xbindex = xiter*maxxrun // 2

                    # retrieve blocks
                    blockbuffer = ""

                    # skip blank blocks
                    startblock = False
                    xrun = 0

                    if islabelblk: 
                        vals = numpy.unique(data)
                        # TODO: ignore blank blocks within an x line 
                        if not (len(vals) == 1 and vals[0] == 0):
                            if resource_server != "":
                                node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True, throttle=False)
                            else:
                                node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True)
                    else:
                        for iterx in range(0, xsize, BLKSIZE):
                            block = data[:,:,iterx:iterx+BLKSIZE]
                            vals = numpy.unique(block)
                            if len(vals) == 1 and vals[0] == delimiter:
                                # check if the block is blank
                                if startblock:
                                    # if the previous block has data, push blocks in current queue
                                    node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) 
                                    startblock = False
                                    xrun = 0
                                    blockbuffer = ""

                            else:
                                if startblock == False:
                                    xbindex = xiter*maxxrun // 2 + iterx // BLKSIZE
                               
                                startblock = True
                                blockbuffer += block.tobytes()
                                xrun += 1


                        # write-out leftover blocks
                        if xrun > 0:
                            node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) 


                downsampleddata.foreach(write2dvid)

            # adjust max coordinate for new level
            xspan = (xspan-1) // 2
            yspan = (yspan-1) // 2
            zspan = (zspan-1) // 2
Esempio n. 31
0
def post_meshes_to_dvid(config, instance_name, partition_items):
    """
    Send the given meshes (either .obj or .drc) as key/value pairs to DVID.
    
    Args:
        config: The CreateMeshes workflow config data
        
        instance_name: key-value instance to post to
            
        partition_items: tuple (group_id, [(segment_id, mesh_data), (segment_id, mesh_data)])
    """
    # Re-use session for connection pooling.
    session = default_dvid_session()

    # Re-use resource manager client connections, too.
    # (If resource-server is empty, this will return a "dummy client")
    resource_client = ResourceManagerClient(
        config["options"]["resource-server"],
        config["options"]["resource-port"])

    dvid_server = config["dvid-info"]["dvid"]["server"]
    uuid = config["dvid-info"]["dvid"]["uuid"]

    grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"]
    mesh_format = config["mesh-config"]["storage"]["format"]

    if grouping_scheme == "no-groups":
        for group_id, segment_ids_and_meshes in partition_items:
            for (segment_id, mesh_data) in segment_ids_and_meshes:

                @auto_retry(3, pause_between_tries=60.0, logging_name=__name__)
                def write_mesh():
                    with resource_client.access_context(
                            dvid_server, False, 2, len(mesh_data)):
                        session.post(
                            f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}',
                            mesh_data)
                        session.post(
                            f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}_info',
                            json={'format': mesh_format})

                write_mesh()
    else:
        # All other grouping schemes, including 'singletons' write tarballs.
        # (In the 'singletons' case, there is just one tarball per body.)
        for group_id, segment_ids_and_meshes in partition_items:
            tar_name = _get_group_name(config, group_id)
            tar_stream = BytesIO()
            with closing(tarfile.open(tar_name, 'w', tar_stream)) as tf:
                for (segment_id, mesh_data) in segment_ids_and_meshes:
                    mesh_name = _get_mesh_name(config, segment_id)
                    f_info = tarfile.TarInfo(mesh_name)
                    f_info.size = len(mesh_data)
                    tf.addfile(f_info, BytesIO(mesh_data))

            tar_bytes = tar_stream.getbuffer()

            @auto_retry(3, pause_between_tries=60.0, logging_name=__name__)
            def write_tar():
                with resource_client.access_context(dvid_server, False, 1,
                                                    len(tar_bytes)):
                    session.post(
                        f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{tar_name}',
                        tar_bytes)

            write_tar()
    def execute(self):
        """
        Execute spark workflow.
        """
        self._sanitize_config()
        session = default_dvid_session()

        dvid_info = self.config_data["dvid-info"]
        options = self.config_data["options"]
        block_shape = 3*(options["blocksize"],)
        self.partition_size = options["blockwritelimit"] * options["blocksize"]
        # ?? num parallel requests might be really small at high levels of pyramids

        # xdim is unbounded or very large
        partition_dims = PartitionDims(options["blocksize"], options["blocksize"], self.partition_size)
        partition_schema = partitionSchema( partition_dims,
                                            blank_delimiter=options["blankdelimiter"],
                                            padding=options["blocksize"],
                                            enablemask=options["has-dvidmask"])

        offset_zyx = np.array( options["offset"][::-1] )
        offset_zyx[0] += options["minslice"]
        imgreader = imagefileSrc( partition_schema,
                                  options["basename"],
                                  (options["minslice"], options["maxslice"]),
                                  VolumeOffset(*offset_zyx),
                                  self.sc )
       
        # !! hack: override iteration size that is set to partition size, TODO: add option
        # this just makes the downstream processing a little more convenient, and reduces
        # unnecessary DVID patching if that is enabled.
        # (must be a multiple of block size)
        imgreader.iteration_size = options["num-tasks"]

        # get dims from image (hackage)
        from PIL import Image
        import requests
        if '%' in options["basename"]:
            minslice_name = options["basename"] % options["minslice"]
        elif '{' in options["basename"]:
            minslice_name = options["basename"].format(options["minslice"])
        else:
            raise RuntimeError(f"Unrecognized format string for image basename: {options['basename']}")
        
        img = Image.open(minslice_name) 
        volume_shape = (1 + options["maxslice"] - options["minslice"], img.height, img.width)
        del img

        global_box_zyx = np.zeros((2,3), dtype=int)
        global_box_zyx[0] = options["offset"]
        global_box_zyx[0] += (options["minslice"], 0, 0)

        global_box_zyx[1] = global_box_zyx[0] + volume_shape

        if options["create-pyramid"]:
            if is_datainstance( dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"] ):
                logger.info("'{dataname}' already exists, skipping creation".format(**dvid_info) )
            else:
                # create data instance and disable dvidmask
                # !! assume if data instance exists and mask is set that all pyramid
                # !! also exits, meaning the mask should be used. 
                options["has-dvidmask"] = False
                if options["disable-original"]:
                    logger.info("Not creating '{dataname}' due to 'disable-original' config setting".format(**dvid_info) )
                elif 0 in options["skipped-pyramid-levels"]:
                    logger.info("Not creating '{dataname}' due to 'skipped-pyramid-levels' config setting".format(**dvid_info) )
                else:
                    if options["is-rawarray"]:
                        create_rawarray8( dvid_info["dvid-server"],
                                          dvid_info["uuid"],
                                          dvid_info["dataname"],
                                          block_shape )
                    else:
                        create_label_instance( dvid_info["dvid-server"],
                                           dvid_info["uuid"],
                                           dvid_info["dataname"],
                                           0,
                                           block_shape )

            if not options["disable-original"] and 0 not in options["skipped-pyramid-levels"]:
                update_extents( dvid_info["dvid-server"],
                                dvid_info["uuid"],
                                dvid_info["dataname"],
                                global_box_zyx )
    
                # Bottom level of pyramid is listed as neuroglancer-compatible
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'neuroglancer', [dvid_info["dataname"]])

        # determine number of pyramid levels if not specified 
        if options["create-pyramid"] or options["create-pyramid-jpeg"]:
            if options["pyramid-depth"] == -1:
                options["pyramid-depth"] = 0
                zsize = options["maxslice"] - options["minslice"] + 1
                while zsize > 512:
                    options["pyramid-depth"] += 1
                    zsize /= 2

                # NeuTu doesn't work well if there aren't at least a few pyramid levels.
                # Even for small volumes, use at least a few pyramid levels,
                # unless the depth was explicit in the config.
                options["pyramid-depth"] = max(options["pyramid-depth"], 4)

        # create pyramid data instances
        if options["create-pyramid-jpeg"]:
            dataname_jpeg = dvid_info["dataname"] + self.JPEGPYRAMID_NAME 
            if 0 in options["skipped-pyramid-levels"]:
                logger.info("Not creating '{}' due to 'skipped-pyramid-levels' config setting".format(dataname_jpeg) )
            else:
                if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], dataname_jpeg):
                    logger.info("'{}' already exists, skipping creation".format(dataname_jpeg) )
                else:
                    create_rawarray8( dvid_info["dvid-server"],
                                      dvid_info["uuid"],
                                      dataname_jpeg,
                                      block_shape,
                                      Compression.JPEG )
    
                update_extents( dvid_info["dvid-server"],
                                dvid_info["uuid"],
                                dataname_jpeg,
                                global_box_zyx )
    
                # Bottom level of pyramid is listed as neuroglancer-compatible
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'neuroglancer', [dataname_jpeg])

    
        if options["create-pyramid"]:
            for level in range(1, 1 + options["pyramid-depth"]):
                downsampled_box_zyx = global_box_zyx // (2**level)
                downname = dvid_info["dataname"] + "_%d" % level

                if level in options["skipped-pyramid-levels"]:
                    logger.info("Not creating '{}' due to 'skipped-pyramid-levels' config setting".format(downname) )
                    continue
                
                if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], downname):
                    logger.info("'{}' already exists, skipping creation".format(downname) )
                else:
                    if options["is-rawarray"]:
                        create_rawarray8( dvid_info["dvid-server"],
                                          dvid_info["uuid"],
                                          downname,
                                          block_shape )
                    else:
                        create_label_instance( dvid_info["dvid-server"],
                                           dvid_info["uuid"],
                                           downname,
                                           0,
                                           block_shape )

                update_extents( dvid_info["dvid-server"],
                                dvid_info["uuid"],
                                downname,
                                downsampled_box_zyx )

                # Higher-levels of the pyramid should not appear in the DVID-lite console.
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'restrictions', [downname])

        if options["create-pyramid-jpeg"]: 
            for level in range(1, 1 + options["pyramid-depth"]):
                downsampled_box_zyx = global_box_zyx // (2**level)
                downname = dvid_info["dataname"] + self.JPEGPYRAMID_NAME + "_%d" % level

                if level in options["skipped-pyramid-levels"]:
                    logger.info("Not creating '{}' due to 'skipped-pyramid-levels' config setting".format(downname) )
                    continue
                
                if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], downname):
                    logger.info("'{}' already exists, skipping creation".format(downname) )
                else:
                    create_rawarray8( dvid_info["dvid-server"],
                                      dvid_info["uuid"],
                                      downname,
                                      block_shape,
                                      Compression.JPEG )

                update_extents( dvid_info["dvid-server"],
                                dvid_info["uuid"],
                                downname,
                                downsampled_box_zyx )

                # Higher-levels of the pyramid should not appear in the DVID-lite console.
                extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'restrictions', [downname])
            
        # create tiles
        if options["create-tiles"] or options["create-tiles-jpeg"]:
            MinTileCoord = global_box_zyx[0][::-1] // options["tilesize"]
            MaxTileCoord = global_box_zyx[1][::-1] // options["tilesize"]
            
            # get max level by just finding max tile coord
            maxval = max(MaxTileCoord) - min(MinTileCoord) + 1
            import math
            self.maxlevel = int(math.log(maxval) / math.log(2))

            tilemeta = {}
            tilemeta["MinTileCoord"] = MinTileCoord.tolist()
            tilemeta["MaxTileCoord"] = MaxTileCoord.tolist()
            tilemeta["Levels"] = {}

            currres = 8.0 # just use as placeholder for now
            for level in range(0, self.maxlevel+1):
                tilemeta["Levels"][str(level)] = { "Resolution" : 3*[currres],
                                                   "TileSize": 3*[options["tilesize"]] }
                currres *= 2

            if options["create-tiles"]:
                session.post("{dvid-server}/api/repo/{uuid}/instance".format(**dvid_info),
                              json={"typename": "imagetile",
                                    "dataname": dvid_info["dataname"]+self.TILENAME,
                                    "source": dvid_info["dataname"],
                                    "format": "png"})
                session.post("{dvid-server}/api/repo/{uuid}/{dataname}{tilename}/metadata".format(tilename=self.TILENAME, **dvid_info), json=tilemeta)

            if options["create-tiles-jpeg"]:
                session.post("{dvid-server}/api/repo/{uuid}/instance".format(**dvid_info),
                              json={ "typename": "imagetile",
                                     "dataname": dvid_info["dataname"]+self.JPEGTILENAME,
                                     "source": dvid_info["dataname"],
                                     "format": "jpg"} )
                session.post("{dvid-server}/api/repo/{uuid}/{dataname_jpeg_tile}/metadata"
                              .format( dataname_jpeg_tile=dvid_info["dataname"]+self.JPEGTILENAME, **dvid_info ),
                              json=tilemeta)

        if dvid_info["dvid-server"].startswith("http://127.0.0.1"):
            def reload_meta():
                reload_server_metadata(dvid_info["dvid-server"])
            self.run_on_each_worker( reload_meta )

        # TODO Validation: should verify syncs exist, should verify pyramid depth 

        # TODO: set syncs for pyramids, tiles if base datatype exists
        # syncs should be removed before ingestion and added afterward

        levels_cache = {}

        # iterate through each partition
        for arraypartition in imgreader:
            # DVID pad if necessary
            if options["has-dvidmask"]:
                dvidsrc = dvidSrc( dvid_info["dvid-server"],
                                   dvid_info["uuid"],
                                   dvid_info["dataname"],
                                   arraypartition,
                                   resource_server=self.resource_server,
                                   resource_port=self. resource_port)

                arraypartition = dvidsrc.extract_volume()

            # potentially need for future iterations
            arraypartition.persist()

            # check for final layer
            finallayer = imgreader.curr_slice > imgreader.end_slice

            if not options["disable-original"]:
                # Write level-0 of the raw data, even if we aren't writing the rest of the pyramid.
                dataname = datanamelossy = None
                if options["create-pyramid"]:
                    dataname = dvid_info["dataname"]
                if options["create-pyramid-jpeg"]:
                    datanamelossy = dvid_info["dataname"] + self.JPEGPYRAMID_NAME
                
                if (dataname or datanamelossy) and 0 not in options["skipped-pyramid-levels"]:
                    self._write_blocks(arraypartition, dataname, datanamelossy) 

            if options["create-tiles"] or options["create-tiles-jpeg"]:
                # repartition into tiles
                schema = partitionSchema(PartitionDims(1,0,0))
                tilepartition = schema.partition_data(arraypartition)
               
                # write unpadded tilesize (will pad with delimiter if needed)
                self._writeimagepyramid(tilepartition)

            if options["create-pyramid"] or options["create-pyramid-jpeg"]:
                if 0 not in levels_cache:
                    levels_cache[0] = []
                levels_cache[0].append(arraypartition) 
                curr_level = 1
                downsample_factor = 2

                # should be a multiple of Z blocks or the final fetch
                assert imgreader.curr_slice % options["blocksize"] == 0
                while ((((imgreader.curr_slice // options["blocksize"]) % downsample_factor) == 0) or finallayer) and curr_level <= options["pyramid-depth"]:
                    partlist = levels_cache[curr_level-1]
                    part = partlist[0]
                    # union all RDDs from the same level
                    for iter1 in range(1, len(partlist)):
                        part = part.union(partlist[iter1])
                    
                    # downsample map
                    israw = options["is-rawarray"]
                    def downsample(part_vol):
                        part, vol = part_vol
                        if not israw:
                            vol = downsample_3Dlabels(vol)[0]
                        else:
                            vol = downsample_raw(vol)[0]
                        return (part, vol)
                    downsampled_array = part.map(downsample)
        
                    # repart (vol and offset will always be power of two because of padding)
                    def repartition_down(part_volume):
                        part, volume = part_volume
                        downsampled_offset = np.array(part.get_offset()) // 2
                        downsampled_reloffset = np.array(part.get_reloffset()) // 2
                        offsetnew = VolumeOffset(*downsampled_offset)
                        reloffsetnew = VolumeOffset(*downsampled_reloffset)
                        partnew = volumePartition((offsetnew.z, offsetnew.y, offsetnew.x), offsetnew, reloffset=reloffsetnew)
                        return partnew, volume

                    downsampled_array = downsampled_array.map(repartition_down)
                    
                    # repartition downsample data
                    partition_dims = PartitionDims(options["blocksize"], options["blocksize"], self.partition_size)
                    schema = partitionSchema( partition_dims,
                                              blank_delimiter=options["blankdelimiter"],
                                              padding=options["blocksize"],
                                              enablemask=options["has-dvidmask"] ) 
                    downsampled_array = schema.partition_data(downsampled_array)

                    # persist before padding if there are more levels
                    if curr_level < options["pyramid-depth"]:
                        downsampled_array.persist()
                        if curr_level not in levels_cache:
                            levels_cache[curr_level] = []
                        levels_cache[curr_level].append(downsampled_array)

                    # pad from DVID (move before persist will allow multi-ingest
                    # but will lead to slightly non-optimal downsampling boundary
                    # effects if using a lossy compression only.
                    if options["has-dvidmask"]:
                        padname = dvid_info["dataname"]
                        if options["create-pyramid-jpeg"]: # !! should pad with orig if computing
                            # pad with jpeg
                            padname += self.JPEGPYRAMID_NAME 
                        padname += "_%d" % curr_level
                        dvidsrc = dvidSrc( dvid_info["dvid-server"],
                                           dvid_info["uuid"],
                                           padname,
                                           downsampled_array,
                                           resource_server=self.resource_server,
                                           resource_port=self.resource_port )

                        downsampled_array = dvidsrc.extract_volume()

                    # write result
                    downname = None
                    downnamelossy = None
                    if options["create-pyramid"]:
                        downname = dvid_info["dataname"] + "_%d" % curr_level 
                    if options["create-pyramid-jpeg"]:
                        downnamelossy = dvid_info["dataname"] + self.JPEGPYRAMID_NAME + "_%d" % curr_level 

                    if curr_level not in options["skipped-pyramid-levels"]:
                        self._write_blocks(downsampled_array, downname, downnamelossy)

                    # remove previous level
                    del levels_cache[curr_level-1]
                    curr_level += 1
                    downsample_factor *= 2