def test_get_img_info_uint16_tif_s3(self): self.set_s3_args() ingest_job = IngestJob(self.args) z_slice = 0 # create an image img_fname = ingest_job.get_img_fname(z_slice) create_img_file(ingest_job.img_size[0], ingest_job.img_size[1], self.args.datatype, self.args.extension, img_fname) # put the image on a bucket s3 = boto3.resource('s3') with open(img_fname, 'rb') as img_file: s3.Bucket(self.args.s3_bucket_name).put_object(Key=img_fname, Body=img_file) # get info on that image im_width, im_height, im_datatype = ingest_job.get_img_info(z_slice) # assert the info is correct im = np.array(Image.open(img_fname)) assert im_width == im.shape[1] assert im_height == im.shape[0] assert im_datatype == im.dtype s3.Bucket(self.args.s3_bucket_name).delete_objects( Delete={'Objects': [{ 'Key': img_fname }]}) os.remove(ingest_job.get_log_fname()) os.remove(img_fname)
def test_get_img_info_uint16_png(self): file_format = 'png' self.args.file_format = file_format ingest_job = IngestJob(self.args) z_slice = 0 img_fname = ingest_job.get_img_fname(z_slice) create_img_file(ingest_job.img_size[0], ingest_job.img_size[1], self.args.datatype, self.args.extension, img_fname) im_width, im_height, im_datatype = ingest_job.get_img_info(z_slice) assert im_width == ingest_job.img_size[0] assert im_height == ingest_job.img_size[1] assert im_datatype == self.args.datatype os.remove(ingest_job.get_log_fname()) os.remove(img_fname)
def test_get_img_info_uint64_tif(self): file_format = 'tif' dtype = 'uint64' self.args.file_format = file_format self.args.datatype = dtype self.args.source_channel = 'def_files' ingest_job = IngestJob(self.args) z_slice = 0 img_fname = ingest_job.get_img_fname(z_slice) create_img_file(ingest_job.img_size[0], ingest_job.img_size[1], self.args.datatype, self.args.extension, img_fname) im_width, im_height, im_datatype = ingest_job.get_img_info(z_slice) assert im_width == ingest_job.img_size[0] assert im_height == ingest_job.img_size[1] assert im_datatype == self.args.datatype os.remove(ingest_job.get_log_fname()) os.remove(img_fname)
def per_channel_ingest(args, channel, threads=8): args.channel = channel ingest_job = IngestJob(args) # extract img_size and datatype to check inputs (by actually reading the data) # this can take a while, as we actually load in the first image slice, # so we should store this first slice so we don't have to load it again when we later read the entire chunk in z # we don't do this for render data source because we get the image size and attributes from the render metadata and the # of bits aren't in the metadata or render if ingest_job.datasource != 'render': im_width, im_height, im_datatype = ingest_job.get_img_info( ingest_job.z_range[0]) # we do this before creating boss resources that could be inaccurate try: assert ingest_job.img_size[0] == im_width and ingest_job.img_size[ 1] == im_height and ingest_job.datatype == im_datatype except AssertionError: ingest_job.send_msg('Mismatch between image file and input parameters. Determined image width: {}, height: {}, datatype: {}'.format( im_width, im_height, im_datatype)) raise ValueError('Image attributes do not match arguments') # create or get the boss resources for the data get_only = not ingest_job.create_resources boss_res_params = BossResParams(ingest_job) boss_res_params.get_resources(get_only=get_only) # we just create the resources, don't do anything else if ingest_job.create_resources: ingest_job.send_msg('{} Resources set up. Collection: {}, Experiment: {}, Channel: {}'.format( get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name)) return 0 else: ingest_job.send_msg('{} Starting ingest for Collection: {}, Experiment: {}, Channel: {}, Z: {z[0]},{z[1]}'.format( get_formatted_datetime(), ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name, z=ingest_job.z_range)) # we begin the ingest here: stride_x = 1024 stride_y = 1024 stride_z = 16 x_buckets = get_supercube_lims(ingest_job.x_extent, stride_x) y_buckets = get_supercube_lims(ingest_job.y_extent, stride_y) z_buckets = get_supercube_lims(ingest_job.z_range, stride_z) with ThreadPool(threads) as pool: # load images files in stacks of 16 at a time into numpy array for _, z_slices in z_buckets.items(): # read images into numpy array im_array = ingest_job.read_img_stack(z_slices) z_rng = [z_slices[0] - ingest_job.offsets[2], z_slices[-1] + 1 - ingest_job.offsets[2]] # slice into np array blocks for _, y_slices in y_buckets.items(): y_rng = [y_slices[0], y_slices[-1] + 1] ingest_block_partial = partial( ingest_block, x_buckets=x_buckets, boss_res_params=boss_res_params, ingest_job=ingest_job, y_rng=y_rng, z_rng=z_rng, im_array=im_array) pool.map(ingest_block_partial, x_buckets.keys()) # checking data posted correctly for an entire z slice assert_equal(boss_res_params, ingest_job, ingest_job.z_range) ch_link = ( 'https://ndwebtools.neurodata.io/channel_detail/{}/{}/{}/').format(ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name) ingest_job.send_msg('{} Finished z slices {} for Collection: {}, Experiment: {}, Channel: {}\nThere were {} read failures and {} POST failures.\nView properties of channel and start downsample job on ndwebtools: {}'.format( get_formatted_datetime(), ingest_job.z_range, ingest_job.coll_name, ingest_job.exp_name, ingest_job.ch_name, ingest_job.num_READ_failures, ingest_job.num_POST_failures, ch_link), send_slack=True) return 0