Beispiel #1
0
    def test_get_img_info_uint16_tif_s3(self):
        self.set_s3_args()
        ingest_job = IngestJob(self.args)

        z_slice = 0

        # create an image
        img_fname = ingest_job.get_img_fname(z_slice)
        create_img_file(ingest_job.img_size[0], ingest_job.img_size[1],
                        self.args.datatype, self.args.extension, img_fname)

        # put the image on a bucket
        s3 = boto3.resource('s3')
        with open(img_fname, 'rb') as img_file:
            s3.Bucket(self.args.s3_bucket_name).put_object(Key=img_fname,
                                                           Body=img_file)

        # get info on that image
        im_width, im_height, im_datatype = ingest_job.get_img_info(z_slice)

        # assert the info is correct
        im = np.array(Image.open(img_fname))
        assert im_width == im.shape[1]
        assert im_height == im.shape[0]
        assert im_datatype == im.dtype

        s3.Bucket(self.args.s3_bucket_name).delete_objects(
            Delete={'Objects': [{
                'Key': img_fname
            }]})

        os.remove(ingest_job.get_log_fname())
        os.remove(img_fname)
Beispiel #2
0
    def test_get_img_info_uint16_png(self):
        file_format = 'png'
        self.args.file_format = file_format
        ingest_job = IngestJob(self.args)

        z_slice = 0

        img_fname = ingest_job.get_img_fname(z_slice)
        create_img_file(ingest_job.img_size[0], ingest_job.img_size[1],
                        self.args.datatype, self.args.extension, img_fname)

        im_width, im_height, im_datatype = ingest_job.get_img_info(z_slice)
        assert im_width == ingest_job.img_size[0]
        assert im_height == ingest_job.img_size[1]
        assert im_datatype == self.args.datatype
        os.remove(ingest_job.get_log_fname())
        os.remove(img_fname)
Beispiel #3
0
    def test_get_img_info_uint64_tif(self):
        file_format = 'tif'
        dtype = 'uint64'
        self.args.file_format = file_format
        self.args.datatype = dtype
        self.args.source_channel = 'def_files'
        ingest_job = IngestJob(self.args)

        z_slice = 0

        img_fname = ingest_job.get_img_fname(z_slice)
        create_img_file(ingest_job.img_size[0], ingest_job.img_size[1],
                        self.args.datatype, self.args.extension, img_fname)

        im_width, im_height, im_datatype = ingest_job.get_img_info(z_slice)
        assert im_width == ingest_job.img_size[0]
        assert im_height == ingest_job.img_size[1]
        assert im_datatype == self.args.datatype
        os.remove(ingest_job.get_log_fname())
        os.remove(img_fname)
Beispiel #4
0
def per_channel_ingest(args, channel, threads=8):
    args.channel = channel
    ingest_job = IngestJob(args)

    # extract img_size and datatype to check inputs (by actually reading the data)
    # this can take a while, as we actually load in the first image slice,
    # so we should store this first slice so we don't have to load it again when we later read the entire chunk in z
    # we don't do this for render data source because we get the image size and attributes from the render metadata and the # of bits aren't in the metadata or render
    if ingest_job.datasource != 'render':
        im_width, im_height, im_datatype = ingest_job.get_img_info(
            ingest_job.z_range[0])

        # we do this before creating boss resources that could be inaccurate
        try:
            assert ingest_job.img_size[0] == im_width and ingest_job.img_size[
                1] == im_height and ingest_job.datatype == im_datatype
        except AssertionError:
            ingest_job.send_msg('Mismatch between image file and input parameters. Determined image width: {}, height: {}, datatype: {}'.format(
                im_width, im_height, im_datatype))
            raise ValueError('Image attributes do not match arguments')

    # create or get the boss resources for the data
    get_only = not ingest_job.create_resources
    boss_res_params = BossResParams(ingest_job)
    boss_res_params.get_resources(get_only=get_only)

    # we just create the resources, don't do anything else
    if ingest_job.create_resources:
        ingest_job.send_msg('{} Resources set up. Collection: {}, Experiment: {}, Channel: {}'.format(
            get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name))
        return 0
    else:
        ingest_job.send_msg('{} Starting ingest for Collection: {}, Experiment: {}, Channel: {}, Z: {z[0]},{z[1]}'.format(
            get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name, z=ingest_job.z_range))

    # we begin the ingest here:
    stride_x = 1024
    stride_y = 1024
    stride_z = 16
    x_buckets = get_supercube_lims(ingest_job.x_extent, stride_x)
    y_buckets = get_supercube_lims(ingest_job.y_extent, stride_y)
    z_buckets = get_supercube_lims(ingest_job.z_range, stride_z)

    with ThreadPool(threads) as pool:

        # load images files in stacks of 16 at a time into numpy array
        for _, z_slices in z_buckets.items():
            # read images into numpy array
            im_array = ingest_job.read_img_stack(z_slices)
            z_rng = [z_slices[0] - ingest_job.offsets[2],
                     z_slices[-1] + 1 - ingest_job.offsets[2]]

            # slice into np array blocks
            for _, y_slices in y_buckets.items():
                y_rng = [y_slices[0], y_slices[-1] + 1]

                ingest_block_partial = partial(
                    ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job,
                    y_rng=y_rng, z_rng=z_rng, im_array=im_array)
                pool.map(ingest_block_partial, x_buckets.keys())

    # checking data posted correctly for an entire z slice
    assert_equal(boss_res_params, ingest_job, ingest_job.z_range)

    ch_link = (
        'https://ndwebtools.neurodata.io/channel_detail/{}/{}/{}/').format(ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name)

    ingest_job.send_msg('{} Finished z slices {} for Collection: {}, Experiment: {}, Channel: {}\nThere were {} read failures and {} POST failures.\nView properties of channel and start downsample job on ndwebtools: {}'.format(
        get_formatted_datetime(),
        ingest_job.z_range, ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name,
        ingest_job.num_READ_failures, ingest_job.num_POST_failures, ch_link), send_slack=True)

    return 0