def test_upload_file_already_in_db(self, mock_session): # Upload the same file but as file instead of frames mock_session.return_value.__enter__.return_value = self.session config_path = os.path.join( self.temp_path, 'config_file.json', ) config = { "upload_type": "file", "microscope": "Mass Spectrometry", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) data_uploader.upload_data_and_update_db( csv=self.csv_path, login=self.credentials_path, config=config_path, overwrite=True, ) # Try uploading a second time data_uploader.upload_data_and_update_db( csv=self.csv_path, login=self.credentials_path, config=config_path, overwrite=True, )
def setUp(self, MockPoolExecutor): """ Set up temporary test directory and mock S3 bucket connection """ # Magic mocking of multiprocessing MockPoolExecutor().__enter__().map = map_mock # Mock S3 directory for upload self.storage_dir = "raw_frames/SMS-2010-01-01-00-00-00-0001" # Create temporary directory and write temp image self.tempdir = TempDirectory() self.temp_path = self.tempdir.path # Temporary frame self.im = np.ones((10, 15), dtype=np.uint16) self.im[2:5, 3:12] = 10000 # Save test tif files self.channel_names = ['phase', 'brightfield', '666'] # Write files in dir for c in self.channel_names: for z in range(2): file_name = 'img_{}_t000_p050_z00{}.tif'.format(c, z) file_path = os.path.join(self.temp_path, file_name) ijmeta = {"Info": json.dumps({"c": c, "z": z})} tifffile.imsave( file_path, self.im + 5000 * z, ijmetadata=ijmeta, ) # Write external metadata in dir self.meta_dict = { 'Summary': { 'Slices': 26, 'PixelType': 'GRAY16', 'Time': '2018-11-01 19:20:34 -0700', 'z-step_um': 0.5, 'PixelSize_um': 0, 'BitDepth': 16, 'Width': 15, 'Height': 10 }, } self.json_filename = os.path.join(self.temp_path, 'metadata.txt') json_ops.write_json_file(self.meta_dict, self.json_filename) # Setup mock S3 bucket self.mock = mock_s3() self.mock.start() self.conn = boto3.resource('s3', region_name='us-east-1') self.bucket_name = 'czbiohub-imaging' self.conn.create_bucket(Bucket=self.bucket_name) # Instantiate file parser class storage_class = aux_utils.get_storage_class('s3') self.frames_inst = tif_splitter.TifFolderSplitter( data_path=self.temp_path, storage_dir=self.storage_dir, storage_class=storage_class, ) # Upload data self.frames_inst.get_frames_and_metadata( filename_parser='parse_sms_name', )
def setUp(self): super().setUp() # Create temporary directory and write temp image self.tempdir = TempDirectory() self.temp_path = self.tempdir.path # Mock file storage self.tempdir.makedir('storage_mount_point') self.mount_point = os.path.join(self.temp_path, 'storage_mount_point') self.tempdir.makedir('storage_mount_point/raw_files') self.tempdir.makedir('storage_mount_point/raw_frames') # Test metadata parameters self.nbr_channels = 2 self.nbr_slices = 3 # Mock S3 dir self.storage_dir = "raw_frames/TEST-2005-06-09-20-00-00-1000" # Temporary file with 6 frames, tifffile stores channels first self.im = 50 * np.ones((6, 10, 15), dtype=np.uint16) self.im[0, :5, 3:12] = 50000 self.im[2, :5, 3:12] = 40000 self.im[4, :5, 3:12] = 30000 # Metadata self.description = 'ImageJ=1.52e\nimages=6\nchannels=2\nslices=3\nmax=10411.0' # Save test tif file self.file_path = os.path.join(self.temp_path, "A1_2_PROTEIN_test.tif") tifffile.imsave( self.file_path, self.im, description=self.description, ) self.dataset_serial = 'TEST-2005-06-09-20-00-00-1000' # Create csv file for upload upload_dict = { 'dataset_id': [self.dataset_serial], 'file_name': [self.file_path], 'description': ['Testing'], 'parent_dataset_id': [None], } upload_csv = pd.DataFrame.from_dict(upload_dict) self.csv_path = os.path.join(self.temp_path, "test_upload.csv") upload_csv.to_csv(self.csv_path) self.credentials_path = os.path.join( self.main_dir, 'db_credentials.json', ) self.config_path = os.path.join( self.temp_path, 'config_tif_id.json', ) config = { "upload_type": "frames", "frames_format": "tif_id", "microscope": "Leica microscope CAN bus adapter", "filename_parser": "parse_ml_name", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, self.config_path)
def test_upload_file(self, mock_session): # Upload the same file but as file instead of frames mock_session.return_value.__enter__.return_value = self.session config_path = os.path.join( self.temp_path, 'config_file.json', ) config = { "upload_type": "file", "microscope": "Mass Spectrometry", "storage": "s3", } json_ops.write_json_file(config, config_path) data_uploader.upload_data_and_update_db( csv=self.csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and file_global datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertEqual(dataset.id, 1) self.assertFalse(dataset.frames) self.assertEqual(dataset.dataset_serial, self.dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2005) self.assertEqual(date_time.month, 6) self.assertEqual(date_time.day, 9) self.assertEqual(dataset.microscope, "Mass Spectrometry") self.assertEqual(dataset.description, 'Testing') # query file_global file_global = self.session.query(db_ops.FileGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \ .one() expected_s3 = "raw_files/TEST-2005-06-09-20-00-00-1000" self.assertEqual( file_global.storage_dir, expected_s3, ) expected_meta = {'file_origin': self.file_path} self.assertDictEqual(file_global.metadata_json, expected_meta) self.assertEqual(file_global.data_set, dataset) sha256 = meta_utils.gen_sha256(self.file_path) self.assertEqual(file_global.sha256, sha256) # Check that file has been uploaded s3_client = boto3.client('s3') key = os.path.join(expected_s3, "A1_2_PROTEIN_test.tif") # Just check that the file is there, we've dissected it before response = s3_client.list_objects_v2(Bucket=self.bucket_name, Prefix=key) self.assertEqual(response['KeyCount'], 1)
def test_upload_file(self, mock_session): # Upload the same file but as file instead of frames mock_session.return_value.__enter__.return_value = self.session config_path = os.path.join( self.temp_path, 'config_file.json', ) config = { "upload_type": "file", "microscope": "Mass Spectrometry", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) data_uploader.upload_data_and_update_db( csv=self.csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and file_global datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertEqual(dataset.id, 1) self.assertFalse(dataset.frames) self.assertEqual(dataset.dataset_serial, self.dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2005) self.assertEqual(date_time.month, 6) self.assertEqual(date_time.day, 9) self.assertEqual(dataset.microscope, "Mass Spectrometry") self.assertEqual(dataset.description, 'Testing') # query file_global file_global = self.session.query(db_ops.FileGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \ .one() expected_dir = "raw_files/TEST-2005-06-09-20-00-00-1000" self.assertEqual( file_global.storage_dir, expected_dir, ) expected_meta = {'file_origin': self.file_path} self.assertDictEqual(file_global.metadata_json, expected_meta) self.assertEqual(file_global.data_set, dataset) sha256 = meta_utils.gen_sha256(self.file_path) self.assertEqual(file_global.sha256, sha256) # Check that file has been uploaded file_path = os.path.join(self.mount_point, expected_dir, 'A1_2_PROTEIN_test.tif') self.assertTrue(os.path.exists(file_path))
def test_write_json_file(): with TempDirectory() as tempdir: valid_json = { "drivername": "postgres", "username": "******", "password": "******", "host": "db_host", "port": 666, "dbname": "db_name" } json_ops.write_json_file( valid_json, os.path.join(tempdir.path, 'valid_json_file.json'), ) json_object = json_ops.read_json_file(os.path.join( tempdir.path, "valid_json_file.json"), schema_name="CREDENTIALS_SCHEMA") nose.tools.assert_equal(json_object, valid_json)
def setUp(self, mock_session): super().setUp() mock_session.return_value.__enter__.return_value = self.session # Create temporary directory and write temp image self.tempdir = TempDirectory() self.temp_path = self.tempdir.path # Mock file storage self.tempdir.makedir('storage_mount_point') self.mount_point = os.path.join(self.temp_path, 'storage_mount_point') self.tempdir.makedir('storage_mount_point/raw_files') self.tempdir.makedir('storage_mount_point/raw_frames') # Test metadata parameters self.nbr_channels = 2 self.nbr_slices = 3 # Mock storage dir self.dataset_serial = 'FRAMES-2005-06-09-20-00-00-1000' self.frames_storage_dir = os.path.join('raw_frames', self.dataset_serial) # Temporary file with 6 frames, tifffile stores channels first self.im = 50 * np.ones((6, 10, 15), dtype=np.uint16) self.im[0, :5, 3:12] = 50000 self.im[2, :5, 3:12] = 40000 self.im[4, :5, 3:12] = 30000 # Metadata self.description = 'ImageJ=1.52e\nimages=6\nchannels=2\nslices=3\nmax=10411.0' # Save test tif file self.file_path = os.path.join(self.temp_path, "A1_2_PROTEIN_test.tif") tifffile.imsave( self.file_path, self.im, description=self.description, ) # Create input arguments for data upload upload_csv = pd.DataFrame( columns=['dataset_id', 'file_name', 'description'], ) upload_csv = upload_csv.append( { 'dataset_id': self.dataset_serial, 'file_name': self.file_path, 'description': 'Testing' }, ignore_index=True, ) self.csv_path_frames = os.path.join( self.temp_path, "test_upload_frames.csv", ) upload_csv.to_csv(self.csv_path_frames) self.credentials_path = os.path.join( self.main_dir, 'db_credentials.json', ) self.config_path = os.path.join( self.temp_path, 'config_tif_id.json', ) config = { "upload_type": "frames", "frames_format": "tif_id", "microscope": "Leica microscope CAN bus adapter", "filename_parser": "parse_ml_name", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, self.config_path) # Upload frames data_uploader.upload_data_and_update_db( csv=self.csv_path_frames, login=self.credentials_path, config=self.config_path, ) # Create input args for file upload self.dataset_serial_file = 'FILE-2005-06-09-20-00-00-1000' self.file_storage_dir = os.path.join('raw_files', self.dataset_serial_file) self.csv_path_file = os.path.join( self.temp_path, "test_upload_file.csv", ) # Change to unique serial upload_csv['dataset_id'] = self.dataset_serial_file upload_csv.to_csv(self.csv_path_file) config_path = os.path.join( self.temp_path, 'config_file.json', ) config = { "upload_type": "file", "microscope": "Mass Spectrometry", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) # Upload file data_uploader.upload_data_and_update_db( csv=self.csv_path_file, login=self.credentials_path, config=config_path, )
def test_upload_tiffolder(self, mock_session): mock_session.return_value.__enter__.return_value = self.session dataset_serial = 'SMS-2010-01-01-01-00-00-0005' # Temporary frame im = np.ones((10, 15), dtype=np.uint8) # Save test tif files self.tempdir.makedir('tiffolder') tif_dir = os.path.join(self.temp_path, 'tiffolder') channel_names = ['phase', 'brightfield', '666'] # Write files in dir for c_name in channel_names: for z in range(2): file_name = 'img_{}_t060_p050_z00{}.tif'.format(c_name, z) file_path = os.path.join(tif_dir, file_name) ijmeta = {"Info": json.dumps({"c": c_name, "z": z})} tifffile.imsave( file_path, im + 50 * z, ijmetadata=ijmeta, ) # Write external metadata in dir self.meta_dict = { 'Summary': { 'Slices': 6, 'PixelType': 'GRAY8', 'Time': '2018-11-01 19:20:34 -0700', 'z-step_um': 0.5, 'PixelSize_um': 0, 'BitDepth': 8, 'Width': 15, 'Height': 10 }, } self.json_filename = os.path.join(tif_dir, 'metadata.txt') json_ops.write_json_file(self.meta_dict, self.json_filename) # Create csv file for upload upload_dict = { 'dataset_id': [dataset_serial], 'file_name': [tif_dir], 'description': ['Testing tifffolder upload'], } upload_csv = pd.DataFrame.from_dict(upload_dict) csv_path = os.path.join(self.temp_path, "test_tiffolder_upload.csv") upload_csv.to_csv(csv_path) config_path = os.path.join( self.temp_path, 'config_tiffolder.json', ) config = { "upload_type": "frames", "frames_format": "tif_folder", "microscope": "CZDRAGONFLY-PC", "filename_parser": "parse_sms_name", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) # Upload data data_uploader.upload_data_and_update_db( csv=csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and frames datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertTrue(dataset.frames) self.assertEqual(dataset.dataset_serial, dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2010) self.assertEqual(date_time.month, 1) self.assertEqual(date_time.day, 1) self.assertEqual(dataset.description, 'Testing tifffolder upload') # query frames_global global_query = self.session.query(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual( global_query[0].storage_dir, 'raw_frames/' + dataset_serial, ) self.assertEqual( global_query[0].nbr_frames, 6, ) self.assertEqual( global_query[0].im_width, 15, ) self.assertEqual( global_query[0].im_height, 10, ) self.assertEqual(global_query[0].nbr_slices, 2) self.assertEqual( global_query[0].nbr_channels, 3, ) self.assertEqual( global_query[0].nbr_positions, 1, ) self.assertEqual( global_query[0].nbr_timepoints, 1, ) self.assertEqual( global_query[0].im_colors, 1, ) self.assertEqual( global_query[0].bit_depth, 'uint8', ) # query frames frames = self.session.query(db_ops.Frames) \ .join(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) \ .order_by(db_ops.Frames.file_name) # Validate content # Channel numbers will be assigned alphabetically channel_names.sort() for i, (c, z) in enumerate(itertools.product(range(3), range(2))): im_name = 'im_c00{}_z00{}_t060_p050.png'.format(c, z) self.assertEqual(frames[i].file_name, im_name) self.assertEqual(frames[i].channel_idx, c) self.assertEqual(frames[i].channel_name, channel_names[c]) self.assertEqual(frames[i].slice_idx, z) self.assertEqual(frames[i].time_idx, 60) self.assertEqual(frames[i].pos_idx, 50) self.assertEqual( frames[i].sha256, meta_utils.gen_sha256(im + 50 * z), ) # # Download frames from storage and compare to originals for i in range(len(channel_names)): for z in range(2): im_name = 'im_c00{}_z00{}_t060_p050.png'.format(i, z) im_path = os.path.join( self.mount_point, 'raw_frames', dataset_serial, im_name, ) im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH) nose.tools.assert_equal(im.dtype, np.uint8) numpy.testing.assert_array_equal(im_out, im + z * 50)
def test_upload_ometif(self, mock_session): mock_session.return_value.__enter__.return_value = self.session dataset_serial = 'ISP-2005-01-01-01-00-00-0001' # Temporary frame im = np.ones((10, 15), dtype=np.uint16) # Metadata ijmeta = { "Info": json.dumps({ "InitialPositionList": [{ "Label": "Pos1" }, { "Label": "Pos3" }] }), } channel_ids = [1, 2] im_names = ['test_Pos1.ome.tif', 'test_Pos3.ome.tif'] for i, c in enumerate(channel_ids): mmmetadata = json.dumps({ "ChannelIndex": c, "Slice": 20, "FrameIndex": 30, "PositionIndex": 40, "Channel": 'channel_{}'.format(c), }) extra_tags = [('MicroManagerMetadata', 's', 0, mmmetadata, True)] # Save test ome tif file file_path = os.path.join(self.temp_path, im_names[i]) tifffile.imsave( file_path, im + i * 10000, ijmetadata=ijmeta, extratags=extra_tags, ) schema_file_path = os.path.realpath( os.path.join(self.main_dir, 'metadata_schema.json'), ) # Create csv file for upload upload_dict = { 'dataset_id': [dataset_serial], 'file_name': [self.temp_path], 'description': ['Testing'], 'positions': [[1, 3]], 'schema_filename': [schema_file_path], } upload_csv = pd.DataFrame.from_dict(upload_dict) csv_path = os.path.join(self.temp_path, "test_ometif_upload.csv") upload_csv.to_csv(csv_path) config_path = os.path.join( self.temp_path, 'config_ome_tiff.json', ) config = { "upload_type": "frames", "frames_format": "ome_tiff", "microscope": "", "schema_filename": "metadata_schema.json", "storage": "local", "storage_access": self.mount_point } json_ops.write_json_file(config, config_path) # Upload data data_uploader.upload_data_and_update_db( csv=csv_path, login=self.credentials_path, config=config_path, ) # Query database to find data_set and frames datasets = self.session.query(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual(datasets.count(), 1) dataset = datasets[0] self.assertEqual(dataset.id, 1) self.assertTrue(dataset.frames) self.assertEqual(dataset.dataset_serial, dataset_serial) date_time = dataset.date_time self.assertEqual(date_time.year, 2005) self.assertEqual(date_time.month, 1) self.assertEqual(date_time.day, 1) self.assertEqual(dataset.description, 'Testing') # query frames_global global_query = self.session.query(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) self.assertEqual( global_query[0].storage_dir, 'raw_frames/' + dataset_serial, ) self.assertEqual( global_query[0].nbr_frames, 2, ) im_shape = im.shape self.assertEqual( global_query[0].im_width, im_shape[1], ) self.assertEqual( global_query[0].im_height, im_shape[0], ) self.assertEqual(global_query[0].nbr_slices, 1) self.assertEqual( global_query[0].nbr_channels, 2, ) self.assertEqual( global_query[0].nbr_positions, 1, ) self.assertEqual( global_query[0].nbr_timepoints, 1, ) self.assertEqual( global_query[0].im_colors, 1, ) self.assertEqual( global_query[0].bit_depth, 'uint16', ) # query frames frames = self.session.query(db_ops.Frames) \ .join(db_ops.FramesGlobal) \ .join(db_ops.DataSet) \ .filter(db_ops.DataSet.dataset_serial == dataset_serial) \ .order_by(db_ops.Frames.file_name) shas = [meta_utils.gen_sha256(im), meta_utils.gen_sha256(im + 10000)] for i, c in enumerate(channel_ids): im_name = 'im_c00{}_z020_t030_p040.png'.format(c) self.assertEqual(frames[i].file_name, im_name) self.assertEqual(frames[i].channel_idx, c) self.assertEqual(frames[i].channel_name, 'channel_{}'.format(c)) self.assertEqual(frames[i].slice_idx, 20) self.assertEqual(frames[i].time_idx, 30) self.assertEqual(frames[i].pos_idx, 40) self.assertEqual(frames[i].sha256, shas[i]) # # Download frames from storage and compare to originals for i, c in enumerate(channel_ids): im_name = 'im_c00{}_z020_t030_p040.png'.format(c) im_path = os.path.join( self.mount_point, 'raw_frames', dataset_serial, im_name, ) im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH) nose.tools.assert_equal(im.dtype, np.uint16) numpy.testing.assert_array_equal(im_out, im + i * 10000)
def test_upload_ometif(self, mock_session): mock_session.return_value.__enter__.return_value = self.session dataset_serial = 'ISP-2005-01-01-01-00-00-0001' # Temporary frame im = np.ones((10, 15), dtype=np.uint16) # Metadata mmmetadata = json.dumps({ "ChannelIndex": 1, "Slice": 2, "FrameIndex": 3, "PositionIndex": 4, "Channel": 'channel_name', }) extra_tags = [('MicroManagerMetadata', 's', 0, mmmetadata, True)] ijmeta = { "Info": json.dumps({ "InitialPositionList": [{ "Label": "Pos1" }, { "Label": "Pos3" }] }), } # Save test ome tif file file_path = os.path.join(self.temp_path, "test_Pos1.ome.tif") tifffile.imsave( file_path, im, ijmetadata=ijmeta, extratags=extra_tags, ) # Get path to json schema file dir_name = os.path.dirname(__file__) schema_file_path = os.path.realpath( os.path.join(dir_name, '..', '..', 'metadata_schema.json'), ) # Create csv file for upload upload_dict = { 'dataset_id': [dataset_serial], 'file_name': [file_path], 'description': ['Testing'], 'positions': [1], 'schema_filename': [schema_file_path], } upload_csv = pd.DataFrame.from_dict(upload_dict) csv_path = os.path.join(self.temp_path, "test_ometif_upload.csv") upload_csv.to_csv(csv_path) config_path = os.path.join( self.temp_path, 'config_ome_tiff.json', ) config = { "upload_type": "frames", "frames_format": "ome_tiff", "microscope": "", "schema_filename": "metadata_schema.json", "storage": "s3", } json_ops.write_json_file(config, config_path) # Upload data data_uploader.upload_data_and_update_db( csv=csv_path, login=self.credentials_path, config=config_path, )
def download_data(dataset_serial, login, dest, storage='local', storage_access=None, metadata=True, download=True, nbr_workers=None, positions=None, times=None, channels=None, slices=None): """ Find all files associated with unique project identifier and download them to a local directory. :param str dataset_serial: Unique dataset identifier :param str login: Full path to json file containing database login credentials :param str dest: Local destination directory name :param str storage: 'local' (default) - data will be stored locally and synced to S3 the same day. Or 'S3' - data will be uploaded directly to S3 then synced with local storage daily. :param str/None storage_access: If not using predefined storage locations, this parameter refers to mount_point for local storage and bucket_name for S3 storage. :param bool download: Downloads all files associated with dataset (default) If False, will only write csvs with metadata. Only for datasets split into frames :param bool metadata: Writes metadata (default True) global metadata in json, local for each frame in csv :param int, None nbr_workers: Number of workers for parallel download If None, it defaults to number of machine processors * 5 :param list, None positions: Positions (FOVs) as integers (default None downloads all) :param list, None times: Timepoints as integers (default None downloads all) :param list, None channels: Channels as integer indices or strings for channel names (default None downloads all) :param list, None slices: Slice (z) integer indices (Default None downloads all) """ try: cli_utils.validate_id(dataset_serial) except AssertionError as e: raise AssertionError("Invalid ID:", e) # Create output directory as a subdirectory in dest named # dataset_serial. It stops if the subdirectory already exists to avoid # the risk of overwriting existing data dest_dir = os.path.join(dest, dataset_serial) try: os.makedirs(dest_dir, exist_ok=False) except FileExistsError as e: raise FileExistsError("Folder {} already exists, {}".format( dest_dir, e)) # Get database connection URI db_connection = db_utils.get_connection_str(login) db_utils.check_connection(db_connection) # Instantiate database class db_inst = db_ops.DatabaseOperations(dataset_serial=dataset_serial, ) # Import local or S3 storage class storage_class = aux_utils.get_storage_class(storage_type=storage) if metadata is False: # Just download file(s) assert download,\ "You set metadata *and* download to False. You get nothing." with db_ops.session_scope(db_connection) as session: storage_dir, file_names = db_inst.get_filenames(session=session, ) else: # If channels can be converted to ints, they're indices if channels is not None: if not isinstance(channels, list): channels = [channels] try: channels = [int(c) for c in channels] except ValueError: # Channels are names, not indices assert all([isinstance(c, str) for c in channels]), \ "channels must be either all str or int" # Get the metadata from the requested frames with db_ops.session_scope(db_connection) as session: global_meta, frames_meta = db_inst.get_frames_meta( session=session, positions=positions, times=times, channels=channels, slices=slices, ) # Write global metadata to destination directory global_meta_filename = os.path.join( dest_dir, "global_metadata.json", ) json_ops.write_json_file( meta_dict=global_meta, json_filename=global_meta_filename, ) # Write info for each frame to destination directory local_meta_filename = os.path.join( dest_dir, "frames_meta.csv", ) frames_meta.to_csv(local_meta_filename, sep=",") # Extract folder and file names if we want to download storage_dir = global_meta["storage_dir"] file_names = frames_meta["file_name"] if download: if nbr_workers is not None: assert nbr_workers > 0,\ "Nbr of worker must be >0, not {}".format(nbr_workers) data_loader = storage_class( storage_dir=storage_dir, nbr_workers=nbr_workers, access_point=storage_access, ) data_loader.download_files(file_names, dest_dir)