Exemple #1
0
    def test_ingest_blocks_uint16_8_threads(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        self.args.experiment = 'dev_ingest_larger' + now
        self.args.channel = 'def_files' + now
        self.args.x_extent = [0, 8 * 1024]
        self.args.z_range = [0, 16]
        self.args.datatype = 'uint16'
        self.args.extension = 'tif'

        x_size = 8 * 1024
        y_size = 1024

        stride_x = 1024
        x_buckets = get_supercube_lims(self.args.x_extent, stride_x)

        ingest_job = IngestJob(self.args)
        gen_images(ingest_job)

        self.args.create_resources = True
        result = per_channel_ingest(self.args, self.args.channel)
        assert result == 0

        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)

        z_slices = list(range(self.args.z_range[0], self.args.z_range[-1]))
        y_rng = self.args.y_extent

        im_array = ingest_job.read_img_stack(z_slices)

        threads = 8

        ingest_block_partial = partial(
            ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job,
            y_rng=y_rng, z_rng=self.args.z_range, im_array=im_array)

        start_time = time.time()
        with ThreadPool(threads) as pool:
            pool.map(ingest_block_partial, x_buckets.keys())
        time_taken = time.time() - start_time
        print('{} secs taken with {} threads'.format(time_taken, threads))

        data_boss = download_boss_slice(
            boss_res_params, ingest_job, 0)[0, :, :]

        data_local = im_array[0, :, :]

        assert np.array_equal(data_boss, data_local)

        # cleanup
        ingest_job = IngestJob(self.args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
        os.remove(ingest_job.get_log_fname())
        del_test_images(ingest_job)
    def test_local_ingest_cuts(self):
        cut = create_cutout(self.cutout_text)
        coll, exp, ch = (cut.collection, cut.experiment, cut.channel)

        datasource, s3_bucket_name, aws_profile, boss_config_file, base_path, base_filename, extension, z_step, datatype = create_local_ingest_params(
        )
        args = Namespace(
            datasource=datasource,
            s3_bucket_name=s3_bucket_name,
            collection=coll,
            experiment=exp,
            channel=ch,
            datatype=datatype,
            aws_profile=aws_profile,
            boss_config_file=boss_config_file,
            base_path=base_path,
            base_filename=base_filename,
            extension=extension,
            z_range=[0, 16],
            z_step=z_step,
            warn_missing_files=True,
            get_extents=True,
            res=0,
        )

        ingest_job = IngestJob(args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.setup_boss_coord_frame(get_only=True)
        boss_res_params.get_resources(get_only=False)

        gen_images(ingest_job)

        # ingest the cut
        ingest_cuts([cut], ingest_job, boss_res_params)

        # pull the data from the boss after the new ingest
        data_boss = boss_res_params.rmt.get_cutout(boss_res_params.ch_resource,
                                                   0, cut.x, cut.y, cut.z)

        # test to make sure it's the same as local file
        z_slices = range(cut.z[0], cut.z[1])

        # loading data locally for comparison
        im_array = ingest_job.read_img_stack(z_slices)
        data_local = im_array[:, cut.y[0]:cut.y[1], cut.x[0]:cut.x[1]]
        assert np.array_equal(data_local, data_boss)

        del_test_images(ingest_job)
        os.remove(ingest_job.get_log_fname())
        os.remove(cut.log_fname)
Exemple #3
0
    def test_read_uint16_img_stack(self):
        ingest_job = IngestJob(self.args)

        # generate some images
        gen_images(ingest_job)

        # load images into memory using ingest_job
        z_slices = range(self.args.z_range[0], self.args.z_range[1])
        im_array = ingest_job.read_img_stack(z_slices)

        # check to make sure each image is equal to each z index in the array
        for z in z_slices:
            img_fname = self.args.base_path + 'img_{:04d}.tif'.format(z)
            with Image.open(img_fname) as im:
                assert np.array_equal(im_array[z, :, :], im)

        del_test_images(ingest_job)
        os.remove(ingest_job.get_log_fname())
Exemple #4
0
def per_channel_ingest(args, channel, threads=8):
    args.channel = channel
    ingest_job = IngestJob(args)

    # extract img_size and datatype to check inputs (by actually reading the data)
    # this can take a while, as we actually load in the first image slice,
    # so we should store this first slice so we don't have to load it again when we later read the entire chunk in z
    # we don't do this for render data source because we get the image size and attributes from the render metadata and the # of bits aren't in the metadata or render
    if ingest_job.datasource != 'render':
        im_width, im_height, im_datatype = ingest_job.get_img_info(
            ingest_job.z_range[0])

        # we do this before creating boss resources that could be inaccurate
        try:
            assert ingest_job.img_size[0] == im_width and ingest_job.img_size[
                1] == im_height and ingest_job.datatype == im_datatype
        except AssertionError:
            ingest_job.send_msg('Mismatch between image file and input parameters. Determined image width: {}, height: {}, datatype: {}'.format(
                im_width, im_height, im_datatype))
            raise ValueError('Image attributes do not match arguments')

    # create or get the boss resources for the data
    get_only = not ingest_job.create_resources
    boss_res_params = BossResParams(ingest_job)
    boss_res_params.get_resources(get_only=get_only)

    # we just create the resources, don't do anything else
    if ingest_job.create_resources:
        ingest_job.send_msg('{} Resources set up. Collection: {}, Experiment: {}, Channel: {}'.format(
            get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name))
        return 0
    else:
        ingest_job.send_msg('{} Starting ingest for Collection: {}, Experiment: {}, Channel: {}, Z: {z[0]},{z[1]}'.format(
            get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name, z=ingest_job.z_range))

    # we begin the ingest here:
    stride_x = 1024
    stride_y = 1024
    stride_z = 16
    x_buckets = get_supercube_lims(ingest_job.x_extent, stride_x)
    y_buckets = get_supercube_lims(ingest_job.y_extent, stride_y)
    z_buckets = get_supercube_lims(ingest_job.z_range, stride_z)

    with ThreadPool(threads) as pool:

        # load images files in stacks of 16 at a time into numpy array
        for _, z_slices in z_buckets.items():
            # read images into numpy array
            im_array = ingest_job.read_img_stack(z_slices)
            z_rng = [z_slices[0] - ingest_job.offsets[2],
                     z_slices[-1] + 1 - ingest_job.offsets[2]]

            # slice into np array blocks
            for _, y_slices in y_buckets.items():
                y_rng = [y_slices[0], y_slices[-1] + 1]

                ingest_block_partial = partial(
                    ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job,
                    y_rng=y_rng, z_rng=z_rng, im_array=im_array)
                pool.map(ingest_block_partial, x_buckets.keys())

    # checking data posted correctly for an entire z slice
    assert_equal(boss_res_params, ingest_job, ingest_job.z_range)

    ch_link = (
        'https://ndwebtools.neurodata.io/channel_detail/{}/{}/{}/').format(ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name)

    ingest_job.send_msg('{} Finished z slices {} for Collection: {}, Experiment: {}, Channel: {}\nThere were {} read failures and {} POST failures.\nView properties of channel and start downsample job on ndwebtools: {}'.format(
        get_formatted_datetime(),
        ingest_job.z_range, ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name,
        ingest_job.num_READ_failures, ingest_job.num_POST_failures, ch_link), send_slack=True)

    return 0