def test_get_boss_res_wrong_img_size(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") x_extent = [0, 2000] y_extent = [0, 1000] z_extent = [0, 50] voxel_size = [1, 5, 1] args = Namespace(datasource='local', collection='ben_dev', experiment='dev_ingest_4' + now, channel='def_files_' + now, boss_config_file=None, voxel_size=voxel_size, voxel_unit='nanometers', datatype='uint16', res=0, x_extent=x_extent, y_extent=y_extent, z_extent=z_extent) ingest_job = IngestJob(args) with pytest.raises(HTTPError): boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) os.remove(ingest_job.get_log_fname())
def test_get_boss_annotation_channel(self): datatype = 'uint64' args = Namespace(datasource='local', collection='ben_dev', experiment='dev_ingest_4', channel='def_files_annot', boss_config_file=None, source_channel='def_files', x_extent=[0, 1000], y_extent=[0, 1024], z_extent=[0, 100]) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) assert ingest_job.ch_name == boss_res_params.ch_resource.name assert ingest_job.boss_datatype == datatype assert ingest_job.ch_type == 'annotation' assert boss_res_params.ch_resource.type == 'annotation' assert boss_res_params.ch_resource.sources == [args.source_channel] os.remove(ingest_job.get_log_fname())
def test_create_boss_res_specified_coord_frame(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") args = Namespace(datasource='local', collection='ben_dev', experiment='dev_ingest_neg' + now, channel='def_files_' + now, boss_config_file=None, voxel_size=[1, 5, 1], voxel_unit='nanometers', datatype='uint16', res=0, x_extent=[100, 1100], y_extent=[0, 1024], z_extent=[200, 300], coord_frame_x_extent=[0, 2000]) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=False) assert boss_res_params.coord_frame_resource.z_start == 200 assert boss_res_params.coord_frame_resource.z_stop == 300 assert boss_res_params.coord_frame_resource.x_start == 0 assert boss_res_params.coord_frame_resource.x_stop == 2000 assert ingest_job.x_extent == [100, 1100] assert ingest_job.y_extent == [0, 1024] assert ingest_job.offsets == [0, 0, 0] os.remove(ingest_job.get_log_fname()) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
def test_get_boss_res_params_just_names(self): args = Namespace(datasource='local', collection='ben_dev', experiment='dev_ingest_4', channel='def_files', boss_config_file=None, x_extent=[0, 1000], y_extent=[0, 1024], z_extent=[0, 100]) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) assert boss_res_params.coll_resource.name == args.collection assert boss_res_params.exp_resource.name == args.experiment assert boss_res_params.ch_resource.name == args.channel assert boss_res_params.exp_resource.hierarchy_method == 'isotropic' assert ingest_job.voxel_size == [1, 1, 1] assert ingest_job.voxel_unit == 'micrometers' assert ingest_job.offsets == [0, 0, 0] assert ingest_job.datatype == 'uint16' assert ingest_job.boss_datatype == 'uint16' assert ingest_job.res == 0 assert ingest_job.extension is None assert ingest_job.z_step is None os.remove(ingest_job.get_log_fname())
def test_post_uint16_cutout(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") x_size = 128 y_size = 128 dtype = 'uint16' bit_width = int(''.join(filter(str.isdigit, dtype))) # generate a block of data data = np.random.randint( 1, 2**bit_width, size=(self.args.z_range[1], y_size, x_size), dtype=dtype) # post (non-zero) data to boss st_x, sp_x, st_y, sp_y, st_z, sp_z = ( 0, x_size, 0, y_size, 0, self.args.z_range[1]) self.args.datatype = dtype self.args.channel = 'def_files' + now ingest_job = IngestJob(self.args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=False) ret_val = post_cutout(boss_res_params, ingest_job, [st_x, sp_x], [st_y, sp_y], [st_z, sp_z], data, attempts=1) assert ret_val == 0 # read data out of boss data_boss = boss_res_params.rmt.get_cutout(boss_res_params.ch_resource, 0, [st_x, sp_x], [st_y, sp_y], [st_z, sp_z]) # assert they are the same assert np.array_equal(data_boss, data) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) os.remove(ingest_job.get_log_fname())
def test_per_channel_ingest_neg_x_extent_offset(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") self.args.experiment = 'test_neg_offset_' + now self.args.channel = 'def_files' self.args.datatype = 'uint16' self.args.x_extent = [-1000, 0] self.args.offset_extents = True self.args.extension = 'png' self.args.channels_list_file = 'tests/channels.test.txt' channels = read_channel_names(self.args.channels_list_file) # assertions are inside ingest_test_per_channel # this is to create resources only: self.args.create_resources = True self.ingest_test_per_channel(self.args, channels) self.args.create_resources = False self.ingest_test_per_channel(self.args, channels) # cleanup for ch in channels: ch_args = self.args ch_args.channel = ch ingest_job = IngestJob(ch_args) os.remove(ingest_job.get_log_fname()) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) if len(channels) > 0: boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
def test_per_channel_ingest_neg_z_extent_offset(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") self.args.experiment = 'test_neg_offset_' + now self.args.channel = 'def_files' self.args.datatype = 'uint8' self.args.z_extent = [-100, 100] self.args.z_range = [-3, 2] self.args.offset_extents = True self.args.extension = 'png' ingest_job = IngestJob(self.args) gen_images(ingest_job) self.args.create_resources = True result = per_channel_ingest(self.args, self.args.channel) assert result == 0 self.args.create_resources = False result = per_channel_ingest(self.args, self.args.channel) assert result == 0 # cleanup del_test_images(ingest_job) os.remove(ingest_job.get_log_fname()) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
def test_ingest_uint8_annotations(self): dtype = 'uint8' now = datetime.now().strftime("%Y%m%d-%H%M%S%f") self.args.base_filename = 'img_annotation_<p:4>' self.args.channel = 'def_files_annotation_' + now self.args.channels_list_file = None self.args.source_channel = 'def_files' self.args.datatype = dtype self.args.extension = 'tif' self.args.create_resources = True ingest_job = IngestJob(self.args) gen_images(ingest_job, intensity_range=30) channel = self.args.channel result = per_channel_ingest(self.args, channel) assert result == 0 self.args.create_resources = False result = per_channel_ingest(self.args, channel) assert result == 0 # cleanup boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) del_test_images(ingest_job) os.remove(ingest_job.get_log_fname())
def iterate_posting_cutouts(cutouts): # separate the cutouts into groupings of shared collections/experiments/channels collections = set([cu.collection for cu in cutouts]) for coll in collections: cus_coll = [cu for cu in cutouts if cu.collection == coll] experiments = set([cu.experiment for cu in cus_coll]) for exp in experiments: cus_exp = [cu for cu in cus_coll if cu.experiment == exp] channels = set([cu.channel for cu in cus_exp]) for ch in channels: cus_ch = [cu for cu in cus_exp if cu.channel == ch] if len(cus_ch) > 0: # posts data for cutouts that share a common coll, exp, and ch msg = 'Repeating cutouts for collection {}, experiment {}, channel {}'.format( coll, exp, ch) cus_ch[-1].send_msg(msg) args = gather_info() args.collection = coll args.experiment = exp args.channel = ch args.get_extents = True ingest_job = IngestJob(args) # we get these things from the resources that already exist on the boss: boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) ingest_cuts(cus_ch, ingest_job, boss_res_params)
def test_ingest_blocks_uint16_8_threads(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") self.args.experiment = 'dev_ingest_larger' + now self.args.channel = 'def_files' + now self.args.x_extent = [0, 8 * 1024] self.args.z_range = [0, 16] self.args.datatype = 'uint16' self.args.extension = 'tif' x_size = 8 * 1024 y_size = 1024 stride_x = 1024 x_buckets = get_supercube_lims(self.args.x_extent, stride_x) ingest_job = IngestJob(self.args) gen_images(ingest_job) self.args.create_resources = True result = per_channel_ingest(self.args, self.args.channel) assert result == 0 boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) z_slices = list(range(self.args.z_range[0], self.args.z_range[-1])) y_rng = self.args.y_extent im_array = ingest_job.read_img_stack(z_slices) threads = 8 ingest_block_partial = partial( ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job, y_rng=y_rng, z_rng=self.args.z_range, im_array=im_array) start_time = time.time() with ThreadPool(threads) as pool: pool.map(ingest_block_partial, x_buckets.keys()) time_taken = time.time() - start_time print('{} secs taken with {} threads'.format(time_taken, threads)) data_boss = download_boss_slice( boss_res_params, ingest_job, 0)[0, :, :] data_local = im_array[0, :, :] assert np.array_equal(data_boss, data_local) # cleanup ingest_job = IngestJob(self.args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=True) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) boss_res_params.rmt.delete_project(boss_res_params.exp_resource) os.remove(ingest_job.get_log_fname()) del_test_images(ingest_job)
def test_get_boss_resources(self): coll, exp, ch, x, y, z = parse_cut_line(self.cutout_text) datasource, _, aws_profile, boss_config_file, base_path, base_filename, extension, z_step, datatype = create_local_ingest_params( ) args = Namespace( datasource=datasource, collection=coll, experiment=exp, channel=ch, datatype=datatype, aws_profile=aws_profile, boss_config_file=boss_config_file, base_path=base_path, base_filename=base_filename, extension=extension, z_range=[0, 16], z_step=z_step, warn_missing_files=True, get_extents=True, res=0, ) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.setup_boss_coord_frame(get_only=True) boss_res_params.get_resources(get_only=False) assert boss_res_params.coll_resource.name == coll assert boss_res_params.exp_resource.name == exp assert boss_res_params.ch_resource.name == ch os.remove(ingest_job.get_log_fname()) boss_res_params.rmt.delete_project(boss_res_params.ch_resource)
def test_create_boss_res_offsets(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") args = Namespace(datasource='local', collection='ben_dev', experiment='dev_ingest_neg' + now, channel='def_files_' + now, boss_config_file=None, voxel_size=[1, 5, 1], voxel_unit='nanometers', datatype='uint16', res=0, x_extent=[-500, 500], y_extent=[0, 1024], z_extent=[200, 300], offset_extents=True) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=False) assert boss_res_params.coord_frame_resource.z_start == 200 assert boss_res_params.coord_frame_resource.z_stop == 300 assert boss_res_params.coord_frame_resource.x_start == 0 assert boss_res_params.coord_frame_resource.x_stop == 1000 assert ingest_job.offsets == [500, 0, 0] # testing to make sure offsets were recorded properly exp_res = boss_res_params.exp_resource boss_offsets_dict = boss_res_params.rmt.get_metadata( exp_res, ['offsets']) boss_offsets = ast.literal_eval(boss_offsets_dict['offsets']) assert boss_offsets == [500, 0, 0] os.remove(ingest_job.get_log_fname()) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
def test_create_boss_annotation_channel(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") datatype = 'uint64' args = Namespace(datasource='local', collection='ben_dev', experiment='dev_ingest_4', channel='def_files_annotation_' + now, boss_config_file=None, source_channel='empty_' + now, voxel_size=[1, 1, 1], voxel_unit='micrometers', datatype=datatype, res=0, x_extent=[0, 1000], y_extent=[0, 1024], z_extent=[0, 100]) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=False) assert ingest_job.ch_name == boss_res_params.ch_resource.name assert ingest_job.boss_datatype == datatype assert ingest_job.ch_type == 'annotation' assert boss_res_params.ch_resource.type == 'annotation' assert boss_res_params.ch_resource.sources == [args.source_channel] os.remove(ingest_job.get_log_fname()) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) # removing the source channel source_setup = ChannelResource(args.source_channel, args.collection, args.experiment) source_resource = boss_res_params.rmt.get_project(source_setup) boss_res_params.rmt.delete_project(source_resource)
def test_create_boss_res(self): now = datetime.now().strftime("%Y%m%d-%H%M%S%f") args = Namespace(datasource='local', collection='ben_dev', experiment='dev_ingest_4' + now, channel='def_files_' + now, boss_config_file=None, voxel_size=[1, 5, 1], voxel_unit='nanometers', datatype='uint16', res=0, x_extent=[0, 1000], y_extent=[0, 1024], z_extent=[0, 100]) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=False) assert boss_res_params.ch_resource.name == args.channel assert boss_res_params.exp_resource.hierarchy_method == 'anisotropic' assert ingest_job.x_extent == args.x_extent assert ingest_job.y_extent == args.y_extent assert ingest_job.z_extent == args.z_extent assert ingest_job.voxel_size == args.voxel_size assert ingest_job.voxel_unit == args.voxel_unit assert ingest_job.img_size == [1000, 1024, 100] assert ingest_job.offsets == [0, 0, 0] assert ingest_job.boss_datatype == 'uint16' assert ingest_job.res == 0 assert ingest_job.extension is None assert ingest_job.z_step is None os.remove(ingest_job.get_log_fname()) boss_res_params.rmt.delete_project(boss_res_params.ch_resource) boss_res_params.rmt.delete_project(boss_res_params.exp_resource)
def test_local_ingest_cuts(self): cut = create_cutout(self.cutout_text) coll, exp, ch = (cut.collection, cut.experiment, cut.channel) datasource, s3_bucket_name, aws_profile, boss_config_file, base_path, base_filename, extension, z_step, datatype = create_local_ingest_params( ) args = Namespace( datasource=datasource, s3_bucket_name=s3_bucket_name, collection=coll, experiment=exp, channel=ch, datatype=datatype, aws_profile=aws_profile, boss_config_file=boss_config_file, base_path=base_path, base_filename=base_filename, extension=extension, z_range=[0, 16], z_step=z_step, warn_missing_files=True, get_extents=True, res=0, ) ingest_job = IngestJob(args) boss_res_params = BossResParams(ingest_job) boss_res_params.setup_boss_coord_frame(get_only=True) boss_res_params.get_resources(get_only=False) gen_images(ingest_job) # ingest the cut ingest_cuts([cut], ingest_job, boss_res_params) # pull the data from the boss after the new ingest data_boss = boss_res_params.rmt.get_cutout(boss_res_params.ch_resource, 0, cut.x, cut.y, cut.z) # test to make sure it's the same as local file z_slices = range(cut.z[0], cut.z[1]) # loading data locally for comparison im_array = ingest_job.read_img_stack(z_slices) data_local = im_array[:, cut.y[0]:cut.y[1], cut.x[0]:cut.x[1]] assert np.array_equal(data_local, data_boss) del_test_images(ingest_job) os.remove(ingest_job.get_log_fname()) os.remove(cut.log_fname)
def per_channel_ingest(args, channel, threads=8): args.channel = channel ingest_job = IngestJob(args) # extract img_size and datatype to check inputs (by actually reading the data) # this can take a while, as we actually load in the first image slice, # so we should store this first slice so we don't have to load it again when we later read the entire chunk in z # we don't do this for render data source because we get the image size and attributes from the render metadata and the # of bits aren't in the metadata or render if ingest_job.datasource != 'render': im_width, im_height, im_datatype = ingest_job.get_img_info( ingest_job.z_range[0]) # we do this before creating boss resources that could be inaccurate try: assert ingest_job.img_size[0] == im_width and ingest_job.img_size[ 1] == im_height and ingest_job.datatype == im_datatype except AssertionError: ingest_job.send_msg('Mismatch between image file and input parameters. Determined image width: {}, height: {}, datatype: {}'.format( im_width, im_height, im_datatype)) raise ValueError('Image attributes do not match arguments') # create or get the boss resources for the data get_only = not ingest_job.create_resources boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=get_only) # we just create the resources, don't do anything else if ingest_job.create_resources: ingest_job.send_msg('{} Resources set up. Collection: {}, Experiment: {}, Channel: {}'.format( get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name)) return 0 else: ingest_job.send_msg('{} Starting ingest for Collection: {}, Experiment: {}, Channel: {}, Z: {z[0]},{z[1]}'.format( get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name, z=ingest_job.z_range)) # we begin the ingest here: stride_x = 1024 stride_y = 1024 stride_z = 16 x_buckets = get_supercube_lims(ingest_job.x_extent, stride_x) y_buckets = get_supercube_lims(ingest_job.y_extent, stride_y) z_buckets = get_supercube_lims(ingest_job.z_range, stride_z) with ThreadPool(threads) as pool: # load images files in stacks of 16 at a time into numpy array for _, z_slices in z_buckets.items(): # read images into numpy array im_array = ingest_job.read_img_stack(z_slices) z_rng = [z_slices[0] - ingest_job.offsets[2], z_slices[-1] + 1 - ingest_job.offsets[2]] # slice into np array blocks for _, y_slices in y_buckets.items(): y_rng = [y_slices[0], y_slices[-1] + 1] ingest_block_partial = partial( ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job, y_rng=y_rng, z_rng=z_rng, im_array=im_array) pool.map(ingest_block_partial, x_buckets.keys()) # checking data posted correctly for an entire z slice assert_equal(boss_res_params, ingest_job, ingest_job.z_range) ch_link = ( 'https://ndwebtools.neurodata.io/channel_detail/{}/{}/{}/').format(ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name) ingest_job.send_msg('{} Finished z slices {} for Collection: {}, Experiment: {}, Channel: {}\nThere were {} read failures and {} POST failures.\nView properties of channel and start downsample job on ndwebtools: {}'.format( get_formatted_datetime(), ingest_job.z_range, ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name, ingest_job.num_READ_failures, ingest_job.num_POST_failures, ch_link), send_slack=True) return 0