def test_download_frames(self, mock_session): mock_session.return_value.__enter__.return_value = self.session # Create dest dir self.tempdir.makedir('dest_dir') dest_dir = os.path.join(self.temp_path, 'dest_dir') # Download data data_downloader.download_data( dataset_serial=self.dataset_serial, login=self.credentials_path, dest=dest_dir, storage_access=self.mount_point, ) # Images are separated by slice first then channel im_order = [0, 2, 4, 1, 3, 5] it = itertools.product(range(self.nbr_channels), range(self.nbr_slices)) for i, (c, z) in enumerate(it): im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z) im_path = os.path.join( dest_dir, self.dataset_serial, im_name, ) im = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH) numpy.testing.assert_array_equal(im, self.im[im_order[i], ...]) # Read and validate frames meta meta_path = os.path.join( dest_dir, self.dataset_serial, 'frames_meta.csv', ) frames_meta = pd.read_csv(meta_path) for i, row in frames_meta.iterrows(): c = i // self.nbr_slices z = i % self.nbr_slices self.assertEqual(row.channel_idx, c) self.assertEqual(row.slice_idx, z) self.assertEqual(row.time_idx, 0) self.assertEqual(row.pos_idx, 0) im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z) self.assertEqual(row.file_name, im_name) sha256 = meta_utils.gen_sha256(self.im[im_order[i], ...]) self.assertEqual(row.sha256, sha256) # Read and validate global meta meta_path = os.path.join( dest_dir, self.dataset_serial, 'global_metadata.json', ) meta_json = json_ops.read_json_file(meta_path) self.assertEqual(meta_json['storage_dir'], self.frames_storage_dir) self.assertEqual(meta_json['nbr_frames'], 6) self.assertEqual(meta_json['im_width'], 15) self.assertEqual(meta_json['im_height'], 10) self.assertEqual(meta_json['nbr_slices'], self.nbr_slices) self.assertEqual(meta_json['nbr_channels'], self.nbr_channels) self.assertEqual(meta_json['im_colors'], 1) self.assertEqual(meta_json['nbr_timepoints'], 1) self.assertEqual(meta_json['nbr_positions'], 1) self.assertEqual(meta_json['bit_depth'], 'uint16')
def test_get_stack_from_meta(self): # Upload image stack storage_dir = "raw_frames/ML-2005-05-23-10-00-00-0001" self.data_storage.upload_frames(self.stack_names, self.im_stack) global_meta = { "storage_dir": storage_dir, "nbr_frames": 5, "im_height": 10, "im_width": 15, "nbr_slices": 5, "nbr_channels": 1, "im_colors": 1, "bit_depth": "uint16", "nbr_timepoints": 1, "nbr_positions": 1, } # Download slices 1:4 frames_meta = meta_utils.make_dataframe(nbr_frames=3) for i in range(3): sha = meta_utils.gen_sha256(self.im_stack[..., i + 1]) frames_meta.loc[i] = [0, i + 1, 0, "A", self.stack_names[i + 1], 0, sha] im_stack, dim_order = self.data_storage.get_stack_from_meta( global_meta=global_meta, frames_meta=frames_meta, ) # Stack has X = 10, Y = 15, grayscale, Z = 3, C = 1, T = 1, P = 1 # so expected stack shape and order should be: expected_shape = (10, 15, 3) nose.tools.assert_equal(im_stack.shape, expected_shape) nose.tools.assert_equal(dim_order, "XYZ")
def serialize_upload(self, frame_file_tuple): """ Given a path for a tif file and its database file name, read the file, serialize it and upload it. Extract file metadata. :param tuple frame_file_tuple: Path to tif file and S3 + DB file name :return str sha256: Checksum for image :return dict dict_i: JSON metadata for frame """ frame_path, frame_name = frame_file_tuple im = tifffile.TiffFile(frame_path) tiftags = im.pages[0].tags # Get all frame specific metadata dict_i = {} for t in tiftags.keys(): dict_i[t] = tiftags[t].value im = im.asarray() sha256 = meta_utils.gen_sha256(im) # Upload to S3 with global client data_uploader.upload_im( im_name=frame_name, im=im, file_format=self.file_format, ) # Do a json dumps otherwise some metadata won't pickle return sha256, json.dumps(dict_i)
def test_gen_sha256_numpy(): expected_sha = 'd1b8118646637256b66ef034778f8d0add8d00436ad1ebb051ef09cf19dbf2d2' # Temporary file with 6 frames, tifffile stores channels first im = 50 * np.ones((6, 50, 50), dtype=np.uint16) sha = meta_utils.gen_sha256(im) nose.tools.assert_equal(expected_sha, sha)
def test_upload_file(self, mock_session): # Upload the same file but as file instead of frames mock_session.return_value.__enter__.return_value = self.session config_path = os.path.join( self.temp_path, 'config_file.json', ) config = { "upload_type": "file", "microscope": "Mass Spectrometry", "storage": "s3", } json_ops.write_json_file(config, config_path) data_uploader.upload_data_and_update_db( csv=self.csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and file_global datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertEqual(dataset.id, 1) self.assertFalse(dataset.frames) self.assertEqual(dataset.dataset_serial, self.dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2005) self.assertEqual(date_time.month, 6) self.assertEqual(date_time.day, 9) self.assertEqual(dataset.microscope, "Mass Spectrometry") self.assertEqual(dataset.description, 'Testing') # query file_global file_global = self.session.query(db_ops.FileGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \ .one() expected_s3 = "raw_files/TEST-2005-06-09-20-00-00-1000" self.assertEqual( file_global.storage_dir, expected_s3, ) expected_meta = {'file_origin': self.file_path} self.assertDictEqual(file_global.metadata_json, expected_meta) self.assertEqual(file_global.data_set, dataset) sha256 = meta_utils.gen_sha256(self.file_path) self.assertEqual(file_global.sha256, sha256) # Check that file has been uploaded s3_client = boto3.client('s3') key = os.path.join(expected_s3, "A1_2_PROTEIN_test.tif") # Just check that the file is there, we've dissected it before response = s3_client.list_objects_v2(Bucket=self.bucket_name, Prefix=key) self.assertEqual(response['KeyCount'], 1)
def _generate_hash(self, im_stack): """ calculates the sha256 checksum for all image slices :param ndarray im_stack: image to be hashed :return list sha: sha256 hashes indexed by the image index """ sha = [] for i in range(im_stack.shape[3]): sha.append(meta_utils.gen_sha256(im_stack[..., i])) return sha
def test_upload_file(self, mock_session): # Upload the same file but as file instead of frames mock_session.return_value.__enter__.return_value = self.session config_path = os.path.join( self.temp_path, 'config_file.json', ) config = { "upload_type": "file", "microscope": "Mass Spectrometry", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) data_uploader.upload_data_and_update_db( csv=self.csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and file_global datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertEqual(dataset.id, 1) self.assertFalse(dataset.frames) self.assertEqual(dataset.dataset_serial, self.dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2005) self.assertEqual(date_time.month, 6) self.assertEqual(date_time.day, 9) self.assertEqual(dataset.microscope, "Mass Spectrometry") self.assertEqual(dataset.description, 'Testing') # query file_global file_global = self.session.query(db_ops.FileGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \ .one() expected_dir = "raw_files/TEST-2005-06-09-20-00-00-1000" self.assertEqual( file_global.storage_dir, expected_dir, ) expected_meta = {'file_origin': self.file_path} self.assertDictEqual(file_global.metadata_json, expected_meta) self.assertEqual(file_global.data_set, dataset) sha256 = meta_utils.gen_sha256(self.file_path) self.assertEqual(file_global.sha256, sha256) # Check that file has been uploaded file_path = os.path.join(self.mount_point, expected_dir, 'A1_2_PROTEIN_test.tif') self.assertTrue(os.path.exists(file_path))
def migrate_db(credentials_filename): """ Updates sha256 checksums for all files and frames :param credentials_filename: Full path to DB credentials file """ # Edit this depending on where your database credential file is stored # This assumes it's stored in dir above imagingDB dir_name = os.path.abspath(os.path.join('..')) dest_dir = os.path.join(dir_name, 'temp_downloads') os.makedirs(dest_dir, exist_ok=True) credentials_str = db_utils.get_connection_str( credentials_filename=credentials_filename, ) # Get files and compute checksums with db_ops.session_scope(credentials_str) as session: files = session.query(db_ops.FileGlobal) for file in files: if file.sha256 is None: data_loader = s3_storage.S3Storage( storage_dir=file.storage_dir, ) file_name = file.metadata_json["file_origin"] file_name = file_name.split("/")[-1] dest_path = os.path.join(dest_dir, file_name) data_loader.download_file( file_name=file_name, dest_path=dest_path, ) checksum = meta_utils.gen_sha256(dest_path) file.sha256 = checksum # Get frames and compute checksums with db_ops.session_scope(credentials_filename) as session: frames = session.query(db_ops.Frames) for frame in frames: if frame.sha256 is None: data_loader = s3_storage.S3Storage( storage_dir=frame.frames_global.storage_dir, ) im = data_loader.get_im(frame.file_name) checksum = meta_utils.gen_sha256(im) frame.sha256 = checksum
def test_get_frames_meta(self): frames_meta = self.frames_inst.get_frames_meta() for i, (c, z) in enumerate(itertools.product(range(3), range(2))): # Validate file name expected_name = 'im_c00{}_z00{}_t000_p050.png'.format(c, z) self.assertEqual(frames_meta.loc[i, 'file_name'], expected_name) # Validate checksum expected_sha = meta_utils.gen_sha256(self.im + 5000 * z) self.assertEqual(frames_meta.loc[i, 'sha256'], expected_sha) # Validate indices self.assertEqual(frames_meta.loc[i, 'channel_idx'], c) self.assertEqual(frames_meta.loc[i, 'slice_idx'], z) self.assertEqual(frames_meta.loc[i, 'time_idx'], 0) self.assertEqual(frames_meta.loc[i, 'pos_idx'], 50)
def test_get_frames_no_metadata(self, MockPoolExecutor): # Magic mocking of multiprocessing MockPoolExecutor().__enter__().map = map_mock os.remove(self.json_filename) self.frames_inst.get_frames_and_metadata( filename_parser='parse_sms_name', ) frames_meta = self.frames_inst.get_frames_meta() for i, (c, z) in enumerate(itertools.product(range(3), range(2))): # Validate file name expected_name = 'im_c00{}_z00{}_t000_p050.png'.format(c, z) self.assertEqual(frames_meta.loc[i, 'file_name'], expected_name) # Validate checksum expected_sha = meta_utils.gen_sha256(self.im + 5000 * z) self.assertEqual(frames_meta.loc[i, 'sha256'], expected_sha) # Validate indices self.assertEqual(frames_meta.loc[i, 'channel_idx'], c) self.assertEqual(frames_meta.loc[i, 'slice_idx'], z) self.assertEqual(frames_meta.loc[i, 'time_idx'], 0) self.assertEqual(frames_meta.loc[i, 'pos_idx'], 50)
def test_gen_sha256_file(): expected_sha = 'af87894cc23928df908b02bd94842d063a5c7aae9eb1bbc2bb5c9475d674bcba' with TempDirectory() as temp_dir: temp_path = temp_dir.path # Temporary file with 6 frames, tifffile stores channels first im = 50 * np.ones((6, 10, 15), dtype=np.uint16) im[0, :5, 3:12] = 50000 im[2, :5, 3:12] = 40000 im[4, :5, 3:12] = 30000 description = 'ImageJ=1.52e\nimages=6\nchannels=2\nslices=3\nmax=10411.0' # Save test tif file file_path = os.path.join(temp_path, "A1_2_PROTEIN_test.tif") tifffile.imsave(file=file_path, data=im, description=description, datetime=datetime.datetime(2019, 1, 1)) sha = meta_utils.gen_sha256(file_path) nose.tools.assert_equal(expected_sha, sha)
def test_get_stack_from_meta(self): # Upload image stack storage_dir = "raw_frames/ML-2005-05-23-10-00-00-0001" data_storage = s3_storage.S3Storage(storage_dir, self.nbr_workers) data_storage.upload_frames(self.stack_names, self.im_stack) global_meta = { "storage_dir": storage_dir, "nbr_frames": 2, "im_height": 10, "im_width": 15, "nbr_slices": 1, "nbr_channels": 2, "im_colors": 1, "bit_depth": "uint16", "nbr_timepoints": 1, "nbr_positions": 1, } frames_meta = meta_utils.make_dataframe( nbr_frames=global_meta["nbr_frames"], ) nbr_frames = self.im_stack.shape[2] sha = [None] * nbr_frames for i in range(nbr_frames): sha[i] = meta_utils.gen_sha256(self.im_stack[..., i]) frames_meta.loc[0] = [0, 0, 0, "A", "im1.png", 0, sha[0]] frames_meta.loc[1] = [1, 0, 0, "B", "im2.png", 0, sha[1]] im_stack, dim_order = data_storage.get_stack_from_meta( global_meta, frames_meta, ) # Stack has X = 10, Y = 15, grayscale, Z = 1, C = 2, T = 1, P = 1 # so expected stack shape and order should be: expected_shape = (10, 15, 2) nose.tools.assert_equal(im_stack.shape, expected_shape) nose.tools.assert_equal(dim_order, "XYC")
def upload_data_and_update_db(csv, login, config, nbr_workers=None, overwrite=False): """ Takes a csv file in which each row represents a dataset, uploads the data to storage and metadata to database. If 'frames' is selected as upload type, each dataset will be split into individual 2D frames before moving to storage. TODO: Add logging instead of printing :param str login: Full path to json file containing login credentials :param str csv: Full path to csv file containing the following fields for each file to be uploaded: str dataset_id: Unique dataset ID <ID>-YYYY-MM-DD-HH-MM-SS-<SSSS> str file_name: Full path to file to be uploaded str description: Short description of file str parent_dataset_id: Parent dataset unique ID if there is one list positions: Which position files in folder to upload. Uploads all if left empty and file_name is a folder. Only valid for ome-tiff uploads. :param str config: Full path to json config file containing the fields: str upload_type: Specify if the file should be split prior to upload Valid options: 'frames' or 'file' str frames_format: Which file splitter class to use. Valid options: 'ome_tiff' needs MicroManagerMetadata tag for each frame for metadata 'tif_folder' when each file is already an individual frame and relies on MicroManager metadata 'tif_id' needs ImageDescription tag in first frame page for metadata str storage: 'local' (default) - data will be stored locally and synced to S3 the same day. Or 'S3' - data will be uploaded directly to S3 then synced with local storage daily. str storage_access: If not using predefined storage locations, this parameter refers to mount_point for local storage and bucket_name for S3 storage. (optional) str json_meta: If splitting to frames, give full path to json metadata schema for reading metadata (optional) :param int, None nbr_workers: Number of workers for parallel uploads :param bool overwrite: Use with caution if your upload if your upload was interrupted and you want to overwrite existing data in database and storage """ # Assert that csv file exists and load it assert os.path.isfile(csv), \ "File doesn't exist: {}".format(csv) files_data = pd.read_csv(csv) # Get database connection URI db_connection = db_utils.get_connection_str(login) db_utils.check_connection(db_connection) # Read and validate config json config_json = json_ops.read_json_file( json_filename=config, schema_name="CONFIG_SCHEMA", ) # Assert that upload type is valid upload_type = config_json['upload_type'].lower() assert upload_type in {"file", "frames"}, \ "upload_type should be 'file' or 'frames', not {}".format( upload_type, ) if nbr_workers is not None: assert nbr_workers > 0, \ "Nbr of worker must be >0, not {}".format(nbr_workers) # Import local or S3 storage class storage = 'local' if 'storage' in config_json: storage = config_json['storage'] storage_class = aux_utils.get_storage_class(storage_type=storage) storage_access = None if 'storage_access' in config_json: storage_access = config_json['storage_access'] # Make sure microscope is a string microscope = None if 'microscope' in config_json: if isinstance(config_json['microscope'], str): microscope = config_json['microscope'] if upload_type == 'frames': # If upload type is frames, check from frames format assert 'frames_format' in config_json, \ 'You must specify the type of file(s)' splitter_class = aux_utils.get_splitter_class( config_json['frames_format'], ) # Upload all files for file_nbr, row in files_data.iterrows(): # Assert that ID is correctly formatted dataset_serial = row.dataset_id try: cli_utils.validate_id(dataset_serial) except AssertionError as e: raise AssertionError("Invalid ID:", e) # Get S3 directory based on upload type if upload_type == "frames": storage_dir = "/".join([FRAME_FOLDER_NAME, dataset_serial]) else: storage_dir = "/".join([FILE_FOLDER_NAME, dataset_serial]) # Instantiate database operations class db_inst = db_ops.DatabaseOperations( dataset_serial=dataset_serial, ) # Make sure dataset is not already in database if not overwrite: with db_ops.session_scope(db_connection) as session: db_inst.assert_unique_id(session) # Check for parent dataset parent_dataset_id = 'None' if 'parent_dataset_id' in row: parent_dataset_id = row.parent_dataset_id # Check for dataset description description = None if 'description' in row: if row.description == row.description: description = row.description if upload_type == "frames": # Instantiate splitter class frames_inst = splitter_class( data_path=row.file_name, storage_dir=storage_dir, storage_class=storage_class, storage_access=storage_access, overwrite=overwrite, file_format=FRAME_FILE_FORMAT, nbr_workers=nbr_workers, ) # Get kwargs if any kwargs = {} if 'positions' in row: positions = row['positions'] if not pd.isna(positions): kwargs['positions'] = positions if 'schema_filename' in config_json: kwargs['schema_filename'] = config_json['schema_filename'] if 'filename_parser' in config_json: filename_parser = config_json['filename_parser'] kwargs['filename_parser'] = filename_parser # Extract metadata and split file into frames frames_inst.get_frames_and_metadata(**kwargs) # Add frames metadata to database try: with db_ops.session_scope(db_connection) as session: db_inst.insert_frames( session=session, description=description, frames_meta=frames_inst.get_frames_meta(), frames_json_meta=frames_inst.get_frames_json(), global_meta=frames_inst.get_global_meta(), global_json_meta=frames_inst.get_global_json(), microscope=microscope, parent_dataset=parent_dataset_id, ) except AssertionError as e: print("Data set {} already in DB".format(dataset_serial), e) # File upload else: # Just upload file without opening it assert os.path.isfile(row.file_name), \ "File doesn't exist: {}".format(row.file_name) data_uploader = storage_class( storage_dir=storage_dir, access_point=storage_access, ) if not overwrite: data_uploader.assert_unique_id() try: data_uploader.upload_file(file_path=row.file_name) print("File {} uploaded to S3".format(row.file_name)) except AssertionError as e: print("File already on S3, moving on to DB entry. {}".format(e)) sha = meta_utils.gen_sha256(row.file_name) # Add file entry to DB once I can get it tested global_json = {"file_origin": row.file_name} file_name = row.file_name.split("/")[-1] try: with db_ops.session_scope(db_connection) as session: db_inst.insert_file( session=session, description=description, storage_dir=storage_dir, file_name=file_name, global_json_meta=global_json, microscope=microscope, parent_dataset=parent_dataset_id, sha256=sha, ) print("File info for {} inserted in DB".format(dataset_serial)) except AssertionError as e: print("File {} already in database".format(dataset_serial))
def test_upload_tiffolder(self, mock_session): mock_session.return_value.__enter__.return_value = self.session dataset_serial = 'SMS-2010-01-01-01-00-00-0005' # Temporary frame im = np.ones((10, 15), dtype=np.uint8) # Save test tif files self.tempdir.makedir('tiffolder') tif_dir = os.path.join(self.temp_path, 'tiffolder') channel_names = ['phase', 'brightfield', '666'] # Write files in dir for c_name in channel_names: for z in range(2): file_name = 'img_{}_t060_p050_z00{}.tif'.format(c_name, z) file_path = os.path.join(tif_dir, file_name) ijmeta = {"Info": json.dumps({"c": c_name, "z": z})} tifffile.imsave( file_path, im + 50 * z, ijmetadata=ijmeta, ) # Write external metadata in dir self.meta_dict = { 'Summary': { 'Slices': 6, 'PixelType': 'GRAY8', 'Time': '2018-11-01 19:20:34 -0700', 'z-step_um': 0.5, 'PixelSize_um': 0, 'BitDepth': 8, 'Width': 15, 'Height': 10 }, } self.json_filename = os.path.join(tif_dir, 'metadata.txt') json_ops.write_json_file(self.meta_dict, self.json_filename) # Create csv file for upload upload_dict = { 'dataset_id': [dataset_serial], 'file_name': [tif_dir], 'description': ['Testing tifffolder upload'], } upload_csv = pd.DataFrame.from_dict(upload_dict) csv_path = os.path.join(self.temp_path, "test_tiffolder_upload.csv") upload_csv.to_csv(csv_path) config_path = os.path.join( self.temp_path, 'config_tiffolder.json', ) config = { "upload_type": "frames", "frames_format": "tif_folder", "microscope": "CZDRAGONFLY-PC", "filename_parser": "parse_sms_name", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) # Upload data data_uploader.upload_data_and_update_db( csv=csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and frames datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertTrue(dataset.frames) self.assertEqual(dataset.dataset_serial, dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2010) self.assertEqual(date_time.month, 1) self.assertEqual(date_time.day, 1) self.assertEqual(dataset.description, 'Testing tifffolder upload') # query frames_global global_query = self.session.query(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual( global_query[0].storage_dir, 'raw_frames/' + dataset_serial, ) self.assertEqual( global_query[0].nbr_frames, 6, ) self.assertEqual( global_query[0].im_width, 15, ) self.assertEqual( global_query[0].im_height, 10, ) self.assertEqual(global_query[0].nbr_slices, 2) self.assertEqual( global_query[0].nbr_channels, 3, ) self.assertEqual( global_query[0].nbr_positions, 1, ) self.assertEqual( global_query[0].nbr_timepoints, 1, ) self.assertEqual( global_query[0].im_colors, 1, ) self.assertEqual( global_query[0].bit_depth, 'uint8', ) # query frames frames = self.session.query(db_ops.Frames) \ .join(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) \ .order_by(db_ops.Frames.file_name) # Validate content # Channel numbers will be assigned alphabetically channel_names.sort() for i, (c, z) in enumerate(itertools.product(range(3), range(2))): im_name = 'im_c00{}_z00{}_t060_p050.png'.format(c, z) self.assertEqual(frames[i].file_name, im_name) self.assertEqual(frames[i].channel_idx, c) self.assertEqual(frames[i].channel_name, channel_names[c]) self.assertEqual(frames[i].slice_idx, z) self.assertEqual(frames[i].time_idx, 60) self.assertEqual(frames[i].pos_idx, 50) self.assertEqual( frames[i].sha256, meta_utils.gen_sha256(im + 50 * z), ) # # Download frames from storage and compare to originals for i in range(len(channel_names)): for z in range(2): im_name = 'im_c00{}_z00{}_t060_p050.png'.format(i, z) im_path = os.path.join( self.mount_point, 'raw_frames', dataset_serial, im_name, ) im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH) nose.tools.assert_equal(im.dtype, np.uint8) numpy.testing.assert_array_equal(im_out, im + z * 50)
def test_upload_ometif(self, mock_session): mock_session.return_value.__enter__.return_value = self.session dataset_serial = 'ISP-2005-01-01-01-00-00-0001' # Temporary frame im = np.ones((10, 15), dtype=np.uint16) # Metadata ijmeta = { "Info": json.dumps({ "InitialPositionList": [{ "Label": "Pos1" }, { "Label": "Pos3" }] }), } channel_ids = [1, 2] im_names = ['test_Pos1.ome.tif', 'test_Pos3.ome.tif'] for i, c in enumerate(channel_ids): mmmetadata = json.dumps({ "ChannelIndex": c, "Slice": 20, "FrameIndex": 30, "PositionIndex": 40, "Channel": 'channel_{}'.format(c), }) extra_tags = [('MicroManagerMetadata', 's', 0, mmmetadata, True)] # Save test ome tif file file_path = os.path.join(self.temp_path, im_names[i]) tifffile.imsave( file_path, im + i * 10000, ijmetadata=ijmeta, extratags=extra_tags, ) schema_file_path = os.path.realpath( os.path.join(self.main_dir, 'metadata_schema.json'), ) # Create csv file for upload upload_dict = { 'dataset_id': [dataset_serial], 'file_name': [self.temp_path], 'description': ['Testing'], 'positions': [[1, 3]], 'schema_filename': [schema_file_path], } upload_csv = pd.DataFrame.from_dict(upload_dict) csv_path = os.path.join(self.temp_path, "test_ometif_upload.csv") upload_csv.to_csv(csv_path) config_path = os.path.join( self.temp_path, 'config_ome_tiff.json', ) config = { "upload_type": "frames", "frames_format": "ome_tiff", "microscope": "", "schema_filename": "metadata_schema.json", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) # Upload data data_uploader.upload_data_and_update_db( csv=csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and frames datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertEqual(dataset.id, 1) self.assertTrue(dataset.frames) self.assertEqual(dataset.dataset_serial, dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2005) self.assertEqual(date_time.month, 1) self.assertEqual(date_time.day, 1) self.assertEqual(dataset.description, 'Testing') # query frames_global global_query = self.session.query(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual( global_query[0].storage_dir, 'raw_frames/' + dataset_serial, ) self.assertEqual( global_query[0].nbr_frames, 2, ) im_shape = im.shape self.assertEqual( global_query[0].im_width, im_shape[1], ) self.assertEqual( global_query[0].im_height, im_shape[0], ) self.assertEqual(global_query[0].nbr_slices, 1) self.assertEqual( global_query[0].nbr_channels, 2, ) self.assertEqual( global_query[0].nbr_positions, 1, ) self.assertEqual( global_query[0].nbr_timepoints, 1, ) self.assertEqual( global_query[0].im_colors, 1, ) self.assertEqual( global_query[0].bit_depth, 'uint16', ) # query frames frames = self.session.query(db_ops.Frames) \ .join(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) \ .order_by(db_ops.Frames.file_name) shas = [meta_utils.gen_sha256(im), meta_utils.gen_sha256(im + 10000)] for i, c in enumerate(channel_ids): im_name = 'im_c00{}_z020_t030_p040.png'.format(c) self.assertEqual(frames[i].file_name, im_name) self.assertEqual(frames[i].channel_idx, c) self.assertEqual(frames[i].channel_name, 'channel_{}'.format(c)) self.assertEqual(frames[i].slice_idx, 20) self.assertEqual(frames[i].time_idx, 30) self.assertEqual(frames[i].pos_idx, 40) self.assertEqual(frames[i].sha256, shas[i]) # # Download frames from storage and compare to originals for i, c in enumerate(channel_ids): im_name = 'im_c00{}_z020_t030_p040.png'.format(c) im_path = os.path.join( self.mount_point, 'raw_frames', dataset_serial, im_name, ) im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH) nose.tools.assert_equal(im.dtype, np.uint16) numpy.testing.assert_array_equal(im_out, im + i * 10000)
def test_upload_frames(self, mock_session): mock_session.return_value.__enter__.return_value = self.session data_uploader.upload_data_and_update_db( csv=self.csv_path, login=self.credentials_path, config=self.config_path, ) # Query database to find data_set and frames datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertEqual(dataset.id, 1) self.assertTrue(dataset.frames) self.assertEqual(dataset.dataset_serial, self.dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2005) self.assertEqual(date_time.month, 6) self.assertEqual(date_time.day, 9) self.assertEqual(dataset.microscope, "Leica microscope CAN bus adapter") self.assertEqual(dataset.description, 'Testing') # query frames_global global_query = self.session.query(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) self.assertEqual( global_query[0].storage_dir, self.storage_dir, ) self.assertEqual( global_query[0].nbr_frames, self.nbr_channels * self.nbr_slices, ) im_shape = self.im.shape self.assertEqual( global_query[0].im_width, im_shape[2], ) self.assertEqual( global_query[0].im_height, im_shape[1], ) self.assertEqual(global_query[0].nbr_slices, self.nbr_slices) self.assertEqual( global_query[0].nbr_channels, self.nbr_channels, ) self.assertEqual( global_query[0].nbr_positions, 1, ) self.assertEqual( global_query[0].nbr_timepoints, 1, ) self.assertEqual( global_query[0].im_colors, 1, ) self.assertEqual( global_query[0].bit_depth, 'uint16', ) # query frames frames = self.session.query(db_ops.Frames) \ .join(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \ .order_by(db_ops.Frames.file_name) # Images are separated by slice first then channel im_order = [0, 2, 4, 1, 3, 5] it = itertools.product(range(self.nbr_channels), range(self.nbr_slices)) for i, (c, z) in enumerate(it): im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z) self.assertEqual(frames[i].file_name, im_name) self.assertEqual(frames[i].channel_idx, c) self.assertEqual(frames[i].slice_idx, z) self.assertEqual(frames[i].time_idx, 0) self.assertEqual(frames[i].pos_idx, 0) sha256 = meta_utils.gen_sha256(self.im[im_order[i], ...]) self.assertEqual(frames[i].sha256, sha256) # Download frames from storage and compare to originals it = itertools.product(range(self.nbr_channels), range(self.nbr_slices)) for i, (c, z) in enumerate(it): im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z) im_path = os.path.join(self.mount_point, self.storage_dir, im_name) im = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH) nose.tools.assert_equal(im.dtype, np.uint16) numpy.testing.assert_array_equal(im, self.im[im_order[i], ...])
def test_gen_sha256_invalid_input(): meta_utils.gen_sha256(5)