예제 #1
0
    def test_get_boss_res_wrong_img_size(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        x_extent = [0, 2000]
        y_extent = [0, 1000]
        z_extent = [0, 50]
        voxel_size = [1, 5, 1]
        args = Namespace(datasource='local',
                         collection='ben_dev',
                         experiment='dev_ingest_4' + now,
                         channel='def_files_' + now,
                         boss_config_file=None,
                         voxel_size=voxel_size,
                         voxel_unit='nanometers',
                         datatype='uint16',
                         res=0,
                         x_extent=x_extent,
                         y_extent=y_extent,
                         z_extent=z_extent)

        ingest_job = IngestJob(args)
        with pytest.raises(HTTPError):
            boss_res_params = BossResParams(ingest_job)
            boss_res_params.get_resources(get_only=True)

        os.remove(ingest_job.get_log_fname())
예제 #2
0
    def test_get_boss_annotation_channel(self):
        datatype = 'uint64'

        args = Namespace(datasource='local',
                         collection='ben_dev',
                         experiment='dev_ingest_4',
                         channel='def_files_annot',
                         boss_config_file=None,
                         source_channel='def_files',
                         x_extent=[0, 1000],
                         y_extent=[0, 1024],
                         z_extent=[0, 100])

        ingest_job = IngestJob(args)

        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)

        assert ingest_job.ch_name == boss_res_params.ch_resource.name
        assert ingest_job.boss_datatype == datatype
        assert ingest_job.ch_type == 'annotation'
        assert boss_res_params.ch_resource.type == 'annotation'
        assert boss_res_params.ch_resource.sources == [args.source_channel]

        os.remove(ingest_job.get_log_fname())
예제 #3
0
    def test_create_boss_res_specified_coord_frame(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        args = Namespace(datasource='local',
                         collection='ben_dev',
                         experiment='dev_ingest_neg' + now,
                         channel='def_files_' + now,
                         boss_config_file=None,
                         voxel_size=[1, 5, 1],
                         voxel_unit='nanometers',
                         datatype='uint16',
                         res=0,
                         x_extent=[100, 1100],
                         y_extent=[0, 1024],
                         z_extent=[200, 300],
                         coord_frame_x_extent=[0, 2000])

        ingest_job = IngestJob(args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=False)

        assert boss_res_params.coord_frame_resource.z_start == 200
        assert boss_res_params.coord_frame_resource.z_stop == 300
        assert boss_res_params.coord_frame_resource.x_start == 0
        assert boss_res_params.coord_frame_resource.x_stop == 2000

        assert ingest_job.x_extent == [100, 1100]
        assert ingest_job.y_extent == [0, 1024]

        assert ingest_job.offsets == [0, 0, 0]

        os.remove(ingest_job.get_log_fname())
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
예제 #4
0
    def test_get_boss_res_params_just_names(self):
        args = Namespace(datasource='local',
                         collection='ben_dev',
                         experiment='dev_ingest_4',
                         channel='def_files',
                         boss_config_file=None,
                         x_extent=[0, 1000],
                         y_extent=[0, 1024],
                         z_extent=[0, 100])

        ingest_job = IngestJob(args)

        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)

        assert boss_res_params.coll_resource.name == args.collection
        assert boss_res_params.exp_resource.name == args.experiment
        assert boss_res_params.ch_resource.name == args.channel
        assert boss_res_params.exp_resource.hierarchy_method == 'isotropic'
        assert ingest_job.voxel_size == [1, 1, 1]
        assert ingest_job.voxel_unit == 'micrometers'
        assert ingest_job.offsets == [0, 0, 0]
        assert ingest_job.datatype == 'uint16'
        assert ingest_job.boss_datatype == 'uint16'
        assert ingest_job.res == 0
        assert ingest_job.extension is None
        assert ingest_job.z_step is None

        os.remove(ingest_job.get_log_fname())
예제 #5
0
    def test_post_uint16_cutout(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")
        x_size = 128
        y_size = 128
        dtype = 'uint16'
        bit_width = int(''.join(filter(str.isdigit, dtype)))

        # generate a block of data
        data = np.random.randint(
            1, 2**bit_width, size=(self.args.z_range[1], y_size, x_size), dtype=dtype)

        # post (non-zero) data to boss
        st_x, sp_x, st_y, sp_y, st_z, sp_z = (
            0, x_size, 0, y_size, 0, self.args.z_range[1])

        self.args.datatype = dtype
        self.args.channel = 'def_files' + now

        ingest_job = IngestJob(self.args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=False)

        ret_val = post_cutout(boss_res_params, ingest_job, [st_x, sp_x], [st_y, sp_y],
                              [st_z, sp_z], data, attempts=1)
        assert ret_val == 0

        # read data out of boss
        data_boss = boss_res_params.rmt.get_cutout(boss_res_params.ch_resource, 0,
                                                   [st_x, sp_x], [st_y, sp_y], [st_z, sp_z])
        # assert they are the same
        assert np.array_equal(data_boss, data)

        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        os.remove(ingest_job.get_log_fname())
예제 #6
0
    def test_per_channel_ingest_neg_x_extent_offset(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        self.args.experiment = 'test_neg_offset_' + now
        self.args.channel = 'def_files'
        self.args.datatype = 'uint16'
        self.args.x_extent = [-1000, 0]
        self.args.offset_extents = True
        self.args.extension = 'png'
        self.args.channels_list_file = 'tests/channels.test.txt'

        channels = read_channel_names(self.args.channels_list_file)

        # assertions are inside ingest_test_per_channel

        # this is to create resources only:
        self.args.create_resources = True
        self.ingest_test_per_channel(self.args, channels)

        self.args.create_resources = False
        self.ingest_test_per_channel(self.args, channels)

        # cleanup
        for ch in channels:
            ch_args = self.args
            ch_args.channel = ch
            ingest_job = IngestJob(ch_args)
            os.remove(ingest_job.get_log_fname())
            boss_res_params = BossResParams(ingest_job)
            boss_res_params.get_resources(get_only=True)
            boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        if len(channels) > 0:
            boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
예제 #7
0
    def test_per_channel_ingest_neg_z_extent_offset(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        self.args.experiment = 'test_neg_offset_' + now
        self.args.channel = 'def_files'
        self.args.datatype = 'uint8'
        self.args.z_extent = [-100, 100]
        self.args.z_range = [-3, 2]
        self.args.offset_extents = True
        self.args.extension = 'png'

        ingest_job = IngestJob(self.args)
        gen_images(ingest_job)

        self.args.create_resources = True
        result = per_channel_ingest(self.args, self.args.channel)
        assert result == 0

        self.args.create_resources = False
        result = per_channel_ingest(self.args, self.args.channel)
        assert result == 0

        # cleanup
        del_test_images(ingest_job)
        os.remove(ingest_job.get_log_fname())
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
예제 #8
0
    def test_ingest_uint8_annotations(self):
        dtype = 'uint8'
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        self.args.base_filename = 'img_annotation_<p:4>'
        self.args.channel = 'def_files_annotation_' + now
        self.args.channels_list_file = None
        self.args.source_channel = 'def_files'
        self.args.datatype = dtype
        self.args.extension = 'tif'
        self.args.create_resources = True

        ingest_job = IngestJob(self.args)

        gen_images(ingest_job, intensity_range=30)

        channel = self.args.channel
        result = per_channel_ingest(self.args, channel)
        assert result == 0

        self.args.create_resources = False
        result = per_channel_ingest(self.args, channel)
        assert result == 0

        # cleanup
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)

        del_test_images(ingest_job)
        os.remove(ingest_job.get_log_fname())
예제 #9
0
def iterate_posting_cutouts(cutouts):
    # separate the cutouts into groupings of shared collections/experiments/channels
    collections = set([cu.collection for cu in cutouts])
    for coll in collections:
        cus_coll = [cu for cu in cutouts if cu.collection == coll]
        experiments = set([cu.experiment for cu in cus_coll])
        for exp in experiments:
            cus_exp = [cu for cu in cus_coll if cu.experiment == exp]
            channels = set([cu.channel for cu in cus_exp])
            for ch in channels:
                cus_ch = [cu for cu in cus_exp if cu.channel == ch]
                if len(cus_ch) > 0:
                    # posts data for cutouts that share a common coll, exp, and ch
                    msg = 'Repeating cutouts for collection {}, experiment {}, channel {}'.format(
                        coll, exp, ch)
                    cus_ch[-1].send_msg(msg)

                    args = gather_info()
                    args.collection = coll
                    args.experiment = exp
                    args.channel = ch
                    args.get_extents = True

                    ingest_job = IngestJob(args)
                    # we get these things from the resources that already exist on the boss:
                    boss_res_params = BossResParams(ingest_job)
                    boss_res_params.get_resources(get_only=True)

                    ingest_cuts(cus_ch, ingest_job, boss_res_params)
예제 #10
0
    def test_ingest_blocks_uint16_8_threads(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        self.args.experiment = 'dev_ingest_larger' + now
        self.args.channel = 'def_files' + now
        self.args.x_extent = [0, 8 * 1024]
        self.args.z_range = [0, 16]
        self.args.datatype = 'uint16'
        self.args.extension = 'tif'

        x_size = 8 * 1024
        y_size = 1024

        stride_x = 1024
        x_buckets = get_supercube_lims(self.args.x_extent, stride_x)

        ingest_job = IngestJob(self.args)
        gen_images(ingest_job)

        self.args.create_resources = True
        result = per_channel_ingest(self.args, self.args.channel)
        assert result == 0

        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)

        z_slices = list(range(self.args.z_range[0], self.args.z_range[-1]))
        y_rng = self.args.y_extent

        im_array = ingest_job.read_img_stack(z_slices)

        threads = 8

        ingest_block_partial = partial(
            ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job,
            y_rng=y_rng, z_rng=self.args.z_range, im_array=im_array)

        start_time = time.time()
        with ThreadPool(threads) as pool:
            pool.map(ingest_block_partial, x_buckets.keys())
        time_taken = time.time() - start_time
        print('{} secs taken with {} threads'.format(time_taken, threads))

        data_boss = download_boss_slice(
            boss_res_params, ingest_job, 0)[0, :, :]

        data_local = im_array[0, :, :]

        assert np.array_equal(data_boss, data_local)

        # cleanup
        ingest_job = IngestJob(self.args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=True)
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
        os.remove(ingest_job.get_log_fname())
        del_test_images(ingest_job)
예제 #11
0
    def test_get_boss_resources(self):
        coll, exp, ch, x, y, z = parse_cut_line(self.cutout_text)

        datasource, _, aws_profile, boss_config_file, base_path, base_filename, extension, z_step, datatype = create_local_ingest_params(
        )
        args = Namespace(
            datasource=datasource,
            collection=coll,
            experiment=exp,
            channel=ch,
            datatype=datatype,
            aws_profile=aws_profile,
            boss_config_file=boss_config_file,
            base_path=base_path,
            base_filename=base_filename,
            extension=extension,
            z_range=[0, 16],
            z_step=z_step,
            warn_missing_files=True,
            get_extents=True,
            res=0,
        )
        ingest_job = IngestJob(args)

        boss_res_params = BossResParams(ingest_job)
        boss_res_params.setup_boss_coord_frame(get_only=True)
        boss_res_params.get_resources(get_only=False)

        assert boss_res_params.coll_resource.name == coll
        assert boss_res_params.exp_resource.name == exp
        assert boss_res_params.ch_resource.name == ch

        os.remove(ingest_job.get_log_fname())
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
예제 #12
0
    def test_create_boss_res_offsets(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        args = Namespace(datasource='local',
                         collection='ben_dev',
                         experiment='dev_ingest_neg' + now,
                         channel='def_files_' + now,
                         boss_config_file=None,
                         voxel_size=[1, 5, 1],
                         voxel_unit='nanometers',
                         datatype='uint16',
                         res=0,
                         x_extent=[-500, 500],
                         y_extent=[0, 1024],
                         z_extent=[200, 300],
                         offset_extents=True)

        ingest_job = IngestJob(args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=False)

        assert boss_res_params.coord_frame_resource.z_start == 200
        assert boss_res_params.coord_frame_resource.z_stop == 300
        assert boss_res_params.coord_frame_resource.x_start == 0
        assert boss_res_params.coord_frame_resource.x_stop == 1000

        assert ingest_job.offsets == [500, 0, 0]

        # testing to make sure offsets were recorded properly
        exp_res = boss_res_params.exp_resource
        boss_offsets_dict = boss_res_params.rmt.get_metadata(
            exp_res, ['offsets'])
        boss_offsets = ast.literal_eval(boss_offsets_dict['offsets'])
        assert boss_offsets == [500, 0, 0]

        os.remove(ingest_job.get_log_fname())
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
예제 #13
0
    def test_create_boss_annotation_channel(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        datatype = 'uint64'

        args = Namespace(datasource='local',
                         collection='ben_dev',
                         experiment='dev_ingest_4',
                         channel='def_files_annotation_' + now,
                         boss_config_file=None,
                         source_channel='empty_' + now,
                         voxel_size=[1, 1, 1],
                         voxel_unit='micrometers',
                         datatype=datatype,
                         res=0,
                         x_extent=[0, 1000],
                         y_extent=[0, 1024],
                         z_extent=[0, 100])

        ingest_job = IngestJob(args)

        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=False)

        assert ingest_job.ch_name == boss_res_params.ch_resource.name
        assert ingest_job.boss_datatype == datatype
        assert ingest_job.ch_type == 'annotation'
        assert boss_res_params.ch_resource.type == 'annotation'
        assert boss_res_params.ch_resource.sources == [args.source_channel]

        os.remove(ingest_job.get_log_fname())
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)

        # removing the source channel
        source_setup = ChannelResource(args.source_channel, args.collection,
                                       args.experiment)
        source_resource = boss_res_params.rmt.get_project(source_setup)
        boss_res_params.rmt.delete_project(source_resource)
예제 #14
0
    def test_create_boss_res(self):
        now = datetime.now().strftime("%Y%m%d-%H%M%S%f")

        args = Namespace(datasource='local',
                         collection='ben_dev',
                         experiment='dev_ingest_4' + now,
                         channel='def_files_' + now,
                         boss_config_file=None,
                         voxel_size=[1, 5, 1],
                         voxel_unit='nanometers',
                         datatype='uint16',
                         res=0,
                         x_extent=[0, 1000],
                         y_extent=[0, 1024],
                         z_extent=[0, 100])

        ingest_job = IngestJob(args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.get_resources(get_only=False)

        assert boss_res_params.ch_resource.name == args.channel
        assert boss_res_params.exp_resource.hierarchy_method == 'anisotropic'
        assert ingest_job.x_extent == args.x_extent
        assert ingest_job.y_extent == args.y_extent
        assert ingest_job.z_extent == args.z_extent
        assert ingest_job.voxel_size == args.voxel_size
        assert ingest_job.voxel_unit == args.voxel_unit
        assert ingest_job.img_size == [1000, 1024, 100]
        assert ingest_job.offsets == [0, 0, 0]
        assert ingest_job.boss_datatype == 'uint16'
        assert ingest_job.res == 0
        assert ingest_job.extension is None
        assert ingest_job.z_step is None

        os.remove(ingest_job.get_log_fname())
        boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
        boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
예제 #15
0
    def test_local_ingest_cuts(self):
        cut = create_cutout(self.cutout_text)
        coll, exp, ch = (cut.collection, cut.experiment, cut.channel)

        datasource, s3_bucket_name, aws_profile, boss_config_file, base_path, base_filename, extension, z_step, datatype = create_local_ingest_params(
        )
        args = Namespace(
            datasource=datasource,
            s3_bucket_name=s3_bucket_name,
            collection=coll,
            experiment=exp,
            channel=ch,
            datatype=datatype,
            aws_profile=aws_profile,
            boss_config_file=boss_config_file,
            base_path=base_path,
            base_filename=base_filename,
            extension=extension,
            z_range=[0, 16],
            z_step=z_step,
            warn_missing_files=True,
            get_extents=True,
            res=0,
        )

        ingest_job = IngestJob(args)
        boss_res_params = BossResParams(ingest_job)
        boss_res_params.setup_boss_coord_frame(get_only=True)
        boss_res_params.get_resources(get_only=False)

        gen_images(ingest_job)

        # ingest the cut
        ingest_cuts([cut], ingest_job, boss_res_params)

        # pull the data from the boss after the new ingest
        data_boss = boss_res_params.rmt.get_cutout(boss_res_params.ch_resource,
                                                   0, cut.x, cut.y, cut.z)

        # test to make sure it's the same as local file
        z_slices = range(cut.z[0], cut.z[1])

        # loading data locally for comparison
        im_array = ingest_job.read_img_stack(z_slices)
        data_local = im_array[:, cut.y[0]:cut.y[1], cut.x[0]:cut.x[1]]
        assert np.array_equal(data_local, data_boss)

        del_test_images(ingest_job)
        os.remove(ingest_job.get_log_fname())
        os.remove(cut.log_fname)
예제 #16
0
def per_channel_ingest(args, channel, threads=8):
    args.channel = channel
    ingest_job = IngestJob(args)

    # extract img_size and datatype to check inputs (by actually reading the data)
    # this can take a while, as we actually load in the first image slice,
    # so we should store this first slice so we don't have to load it again when we later read the entire chunk in z
    # we don't do this for render data source because we get the image size and attributes from the render metadata and the # of bits aren't in the metadata or render
    if ingest_job.datasource != 'render':
        im_width, im_height, im_datatype = ingest_job.get_img_info(
            ingest_job.z_range[0])

        # we do this before creating boss resources that could be inaccurate
        try:
            assert ingest_job.img_size[0] == im_width and ingest_job.img_size[
                1] == im_height and ingest_job.datatype == im_datatype
        except AssertionError:
            ingest_job.send_msg('Mismatch between image file and input parameters. Determined image width: {}, height: {}, datatype: {}'.format(
                im_width, im_height, im_datatype))
            raise ValueError('Image attributes do not match arguments')

    # create or get the boss resources for the data
    get_only = not ingest_job.create_resources
    boss_res_params = BossResParams(ingest_job)
    boss_res_params.get_resources(get_only=get_only)

    # we just create the resources, don't do anything else
    if ingest_job.create_resources:
        ingest_job.send_msg('{} Resources set up. Collection: {}, Experiment: {}, Channel: {}'.format(
            get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name))
        return 0
    else:
        ingest_job.send_msg('{} Starting ingest for Collection: {}, Experiment: {}, Channel: {}, Z: {z[0]},{z[1]}'.format(
            get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name, z=ingest_job.z_range))

    # we begin the ingest here:
    stride_x = 1024
    stride_y = 1024
    stride_z = 16
    x_buckets = get_supercube_lims(ingest_job.x_extent, stride_x)
    y_buckets = get_supercube_lims(ingest_job.y_extent, stride_y)
    z_buckets = get_supercube_lims(ingest_job.z_range, stride_z)

    with ThreadPool(threads) as pool:

        # load images files in stacks of 16 at a time into numpy array
        for _, z_slices in z_buckets.items():
            # read images into numpy array
            im_array = ingest_job.read_img_stack(z_slices)
            z_rng = [z_slices[0] - ingest_job.offsets[2],
                     z_slices[-1] + 1 - ingest_job.offsets[2]]

            # slice into np array blocks
            for _, y_slices in y_buckets.items():
                y_rng = [y_slices[0], y_slices[-1] + 1]

                ingest_block_partial = partial(
                    ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job,
                    y_rng=y_rng, z_rng=z_rng, im_array=im_array)
                pool.map(ingest_block_partial, x_buckets.keys())

    # checking data posted correctly for an entire z slice
    assert_equal(boss_res_params, ingest_job, ingest_job.z_range)

    ch_link = (
        'https://ndwebtools.neurodata.io/channel_detail/{}/{}/{}/').format(ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name)

    ingest_job.send_msg('{} Finished z slices {} for Collection: {}, Experiment: {}, Channel: {}\nThere were {} read failures and {} POST failures.\nView properties of channel and start downsample job on ndwebtools: {}'.format(
        get_formatted_datetime(),
        ingest_job.z_range, ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name,
        ingest_job.num_READ_failures, ingest_job.num_POST_failures, ch_link), send_slack=True)

    return 0