def test_upload_file_already_in_db(self, mock_session):
        # Upload the same file but as file instead of frames
        mock_session.return_value.__enter__.return_value = self.session

        config_path = os.path.join(
            self.temp_path,
            'config_file.json',
        )
        config = {
            "upload_type": "file",
            "microscope": "Mass Spectrometry",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, config_path)
        data_uploader.upload_data_and_update_db(
            csv=self.csv_path,
            login=self.credentials_path,
            config=config_path,
            overwrite=True,
        )
        # Try uploading a second time
        data_uploader.upload_data_and_update_db(
            csv=self.csv_path,
            login=self.credentials_path,
            config=config_path,
            overwrite=True,
        )
Exemple #2
0
    def setUp(self, MockPoolExecutor):
        """
        Set up temporary test directory and mock S3 bucket connection
        """
        # Magic mocking of multiprocessing
        MockPoolExecutor().__enter__().map = map_mock
        # Mock S3 directory for upload
        self.storage_dir = "raw_frames/SMS-2010-01-01-00-00-00-0001"
        # Create temporary directory and write temp image
        self.tempdir = TempDirectory()
        self.temp_path = self.tempdir.path
        # Temporary frame
        self.im = np.ones((10, 15), dtype=np.uint16)
        self.im[2:5, 3:12] = 10000
        # Save test tif files
        self.channel_names = ['phase', 'brightfield', '666']
        # Write files in dir
        for c in self.channel_names:
            for z in range(2):
                file_name = 'img_{}_t000_p050_z00{}.tif'.format(c, z)
                file_path = os.path.join(self.temp_path, file_name)
                ijmeta = {"Info": json.dumps({"c": c, "z": z})}
                tifffile.imsave(
                    file_path,
                    self.im + 5000 * z,
                    ijmetadata=ijmeta,
                )
        # Write external metadata in dir
        self.meta_dict = {
            'Summary': {
                'Slices': 26,
                'PixelType': 'GRAY16',
                'Time': '2018-11-01 19:20:34 -0700',
                'z-step_um': 0.5,
                'PixelSize_um': 0,
                'BitDepth': 16,
                'Width': 15,
                'Height': 10
            },
        }
        self.json_filename = os.path.join(self.temp_path, 'metadata.txt')
        json_ops.write_json_file(self.meta_dict, self.json_filename)

        # Setup mock S3 bucket
        self.mock = mock_s3()
        self.mock.start()
        self.conn = boto3.resource('s3', region_name='us-east-1')
        self.bucket_name = 'czbiohub-imaging'
        self.conn.create_bucket(Bucket=self.bucket_name)
        # Instantiate file parser class
        storage_class = aux_utils.get_storage_class('s3')
        self.frames_inst = tif_splitter.TifFolderSplitter(
            data_path=self.temp_path,
            storage_dir=self.storage_dir,
            storage_class=storage_class,
        )
        # Upload data
        self.frames_inst.get_frames_and_metadata(
            filename_parser='parse_sms_name', )
    def setUp(self):
        super().setUp()
        # Create temporary directory and write temp image
        self.tempdir = TempDirectory()
        self.temp_path = self.tempdir.path
        # Mock file storage
        self.tempdir.makedir('storage_mount_point')
        self.mount_point = os.path.join(self.temp_path, 'storage_mount_point')
        self.tempdir.makedir('storage_mount_point/raw_files')
        self.tempdir.makedir('storage_mount_point/raw_frames')

        # Test metadata parameters
        self.nbr_channels = 2
        self.nbr_slices = 3
        # Mock S3 dir
        self.storage_dir = "raw_frames/TEST-2005-06-09-20-00-00-1000"
        # Temporary file with 6 frames, tifffile stores channels first
        self.im = 50 * np.ones((6, 10, 15), dtype=np.uint16)
        self.im[0, :5, 3:12] = 50000
        self.im[2, :5, 3:12] = 40000
        self.im[4, :5, 3:12] = 30000
        # Metadata
        self.description = 'ImageJ=1.52e\nimages=6\nchannels=2\nslices=3\nmax=10411.0'
        # Save test tif file
        self.file_path = os.path.join(self.temp_path, "A1_2_PROTEIN_test.tif")
        tifffile.imsave(
            self.file_path,
            self.im,
            description=self.description,
        )
        self.dataset_serial = 'TEST-2005-06-09-20-00-00-1000'
        # Create csv file for upload
        upload_dict = {
            'dataset_id': [self.dataset_serial],
            'file_name': [self.file_path],
            'description': ['Testing'],
            'parent_dataset_id': [None],
        }
        upload_csv = pd.DataFrame.from_dict(upload_dict)
        self.csv_path = os.path.join(self.temp_path, "test_upload.csv")
        upload_csv.to_csv(self.csv_path)
        self.credentials_path = os.path.join(
            self.main_dir,
            'db_credentials.json',
        )
        self.config_path = os.path.join(
            self.temp_path,
            'config_tif_id.json',
        )
        config = {
            "upload_type": "frames",
            "frames_format": "tif_id",
            "microscope": "Leica microscope CAN bus adapter",
            "filename_parser": "parse_ml_name",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, self.config_path)
    def test_upload_file(self, mock_session):
        # Upload the same file but as file instead of frames
        mock_session.return_value.__enter__.return_value = self.session

        config_path = os.path.join(
            self.temp_path,
            'config_file.json',
        )
        config = {
            "upload_type": "file",
            "microscope": "Mass Spectrometry",
            "storage": "s3",
        }
        json_ops.write_json_file(config, config_path)
        data_uploader.upload_data_and_update_db(
            csv=self.csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and file_global
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertEqual(dataset.id, 1)
        self.assertFalse(dataset.frames)
        self.assertEqual(dataset.dataset_serial, self.dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2005)
        self.assertEqual(date_time.month, 6)
        self.assertEqual(date_time.day, 9)
        self.assertEqual(dataset.microscope, "Mass Spectrometry")
        self.assertEqual(dataset.description, 'Testing')
        # query file_global
        file_global = self.session.query(db_ops.FileGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \
            .one()
        expected_s3 = "raw_files/TEST-2005-06-09-20-00-00-1000"
        self.assertEqual(
            file_global.storage_dir,
            expected_s3,
        )
        expected_meta = {'file_origin': self.file_path}
        self.assertDictEqual(file_global.metadata_json, expected_meta)
        self.assertEqual(file_global.data_set, dataset)
        sha256 = meta_utils.gen_sha256(self.file_path)
        self.assertEqual(file_global.sha256, sha256)
        # Check that file has been uploaded
        s3_client = boto3.client('s3')
        key = os.path.join(expected_s3, "A1_2_PROTEIN_test.tif")
        # Just check that the file is there, we've dissected it before
        response = s3_client.list_objects_v2(Bucket=self.bucket_name,
                                             Prefix=key)
        self.assertEqual(response['KeyCount'], 1)
    def test_upload_file(self, mock_session):
        # Upload the same file but as file instead of frames
        mock_session.return_value.__enter__.return_value = self.session

        config_path = os.path.join(
            self.temp_path,
            'config_file.json',
        )
        config = {
            "upload_type": "file",
            "microscope": "Mass Spectrometry",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, config_path)
        data_uploader.upload_data_and_update_db(
            csv=self.csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and file_global
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertEqual(dataset.id, 1)
        self.assertFalse(dataset.frames)
        self.assertEqual(dataset.dataset_serial, self.dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2005)
        self.assertEqual(date_time.month, 6)
        self.assertEqual(date_time.day, 9)
        self.assertEqual(dataset.microscope, "Mass Spectrometry")
        self.assertEqual(dataset.description, 'Testing')
        # query file_global
        file_global = self.session.query(db_ops.FileGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \
            .one()
        expected_dir = "raw_files/TEST-2005-06-09-20-00-00-1000"
        self.assertEqual(
            file_global.storage_dir,
            expected_dir,
        )
        expected_meta = {'file_origin': self.file_path}
        self.assertDictEqual(file_global.metadata_json, expected_meta)
        self.assertEqual(file_global.data_set, dataset)
        sha256 = meta_utils.gen_sha256(self.file_path)
        self.assertEqual(file_global.sha256, sha256)
        # Check that file has been uploaded
        file_path = os.path.join(self.mount_point, expected_dir,
                                 'A1_2_PROTEIN_test.tif')
        self.assertTrue(os.path.exists(file_path))
def test_write_json_file():
    with TempDirectory() as tempdir:
        valid_json = {
            "drivername": "postgres",
            "username": "******",
            "password": "******",
            "host": "db_host",
            "port": 666,
            "dbname": "db_name"
        }
        json_ops.write_json_file(
            valid_json,
            os.path.join(tempdir.path, 'valid_json_file.json'),
        )
        json_object = json_ops.read_json_file(os.path.join(
            tempdir.path, "valid_json_file.json"),
                                              schema_name="CREDENTIALS_SCHEMA")
        nose.tools.assert_equal(json_object, valid_json)
Exemple #7
0
 def setUp(self, mock_session):
     super().setUp()
     mock_session.return_value.__enter__.return_value = self.session
     # Create temporary directory and write temp image
     self.tempdir = TempDirectory()
     self.temp_path = self.tempdir.path
     # Mock file storage
     self.tempdir.makedir('storage_mount_point')
     self.mount_point = os.path.join(self.temp_path, 'storage_mount_point')
     self.tempdir.makedir('storage_mount_point/raw_files')
     self.tempdir.makedir('storage_mount_point/raw_frames')
     # Test metadata parameters
     self.nbr_channels = 2
     self.nbr_slices = 3
     # Mock storage dir
     self.dataset_serial = 'FRAMES-2005-06-09-20-00-00-1000'
     self.frames_storage_dir = os.path.join('raw_frames',
                                            self.dataset_serial)
     # Temporary file with 6 frames, tifffile stores channels first
     self.im = 50 * np.ones((6, 10, 15), dtype=np.uint16)
     self.im[0, :5, 3:12] = 50000
     self.im[2, :5, 3:12] = 40000
     self.im[4, :5, 3:12] = 30000
     # Metadata
     self.description = 'ImageJ=1.52e\nimages=6\nchannels=2\nslices=3\nmax=10411.0'
     # Save test tif file
     self.file_path = os.path.join(self.temp_path, "A1_2_PROTEIN_test.tif")
     tifffile.imsave(
         self.file_path,
         self.im,
         description=self.description,
     )
     # Create input arguments for data upload
     upload_csv = pd.DataFrame(
         columns=['dataset_id', 'file_name', 'description'], )
     upload_csv = upload_csv.append(
         {
             'dataset_id': self.dataset_serial,
             'file_name': self.file_path,
             'description': 'Testing'
         },
         ignore_index=True,
     )
     self.csv_path_frames = os.path.join(
         self.temp_path,
         "test_upload_frames.csv",
     )
     upload_csv.to_csv(self.csv_path_frames)
     self.credentials_path = os.path.join(
         self.main_dir,
         'db_credentials.json',
     )
     self.config_path = os.path.join(
         self.temp_path,
         'config_tif_id.json',
     )
     config = {
         "upload_type": "frames",
         "frames_format": "tif_id",
         "microscope": "Leica microscope CAN bus adapter",
         "filename_parser": "parse_ml_name",
         "storage": "local",
         "storage_access": self.mount_point
     }
     json_ops.write_json_file(config, self.config_path)
     # Upload frames
     data_uploader.upload_data_and_update_db(
         csv=self.csv_path_frames,
         login=self.credentials_path,
         config=self.config_path,
     )
     # Create input args for file upload
     self.dataset_serial_file = 'FILE-2005-06-09-20-00-00-1000'
     self.file_storage_dir = os.path.join('raw_files',
                                          self.dataset_serial_file)
     self.csv_path_file = os.path.join(
         self.temp_path,
         "test_upload_file.csv",
     )
     # Change to unique serial
     upload_csv['dataset_id'] = self.dataset_serial_file
     upload_csv.to_csv(self.csv_path_file)
     config_path = os.path.join(
         self.temp_path,
         'config_file.json',
     )
     config = {
         "upload_type": "file",
         "microscope": "Mass Spectrometry",
         "storage": "local",
         "storage_access": self.mount_point
     }
     json_ops.write_json_file(config, config_path)
     # Upload file
     data_uploader.upload_data_and_update_db(
         csv=self.csv_path_file,
         login=self.credentials_path,
         config=config_path,
     )
    def test_upload_tiffolder(self, mock_session):
        mock_session.return_value.__enter__.return_value = self.session

        dataset_serial = 'SMS-2010-01-01-01-00-00-0005'
        # Temporary frame
        im = np.ones((10, 15), dtype=np.uint8)

        # Save test tif files
        self.tempdir.makedir('tiffolder')
        tif_dir = os.path.join(self.temp_path, 'tiffolder')
        channel_names = ['phase', 'brightfield', '666']
        # Write files in dir
        for c_name in channel_names:
            for z in range(2):
                file_name = 'img_{}_t060_p050_z00{}.tif'.format(c_name, z)
                file_path = os.path.join(tif_dir, file_name)
                ijmeta = {"Info": json.dumps({"c": c_name, "z": z})}
                tifffile.imsave(
                    file_path,
                    im + 50 * z,
                    ijmetadata=ijmeta,
                )
        # Write external metadata in dir
        self.meta_dict = {
            'Summary': {
                'Slices': 6,
                'PixelType': 'GRAY8',
                'Time': '2018-11-01 19:20:34 -0700',
                'z-step_um': 0.5,
                'PixelSize_um': 0,
                'BitDepth': 8,
                'Width': 15,
                'Height': 10
            },
        }
        self.json_filename = os.path.join(tif_dir, 'metadata.txt')
        json_ops.write_json_file(self.meta_dict, self.json_filename)

        # Create csv file for upload
        upload_dict = {
            'dataset_id': [dataset_serial],
            'file_name': [tif_dir],
            'description': ['Testing tifffolder upload'],
        }
        upload_csv = pd.DataFrame.from_dict(upload_dict)
        csv_path = os.path.join(self.temp_path, "test_tiffolder_upload.csv")
        upload_csv.to_csv(csv_path)
        config_path = os.path.join(
            self.temp_path,
            'config_tiffolder.json',
        )
        config = {
            "upload_type": "frames",
            "frames_format": "tif_folder",
            "microscope": "CZDRAGONFLY-PC",
            "filename_parser": "parse_sms_name",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, config_path)
        # Upload data
        data_uploader.upload_data_and_update_db(
            csv=csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and frames
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertTrue(dataset.frames)
        self.assertEqual(dataset.dataset_serial, dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2010)
        self.assertEqual(date_time.month, 1)
        self.assertEqual(date_time.day, 1)
        self.assertEqual(dataset.description, 'Testing tifffolder upload')
        # query frames_global
        global_query = self.session.query(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(
            global_query[0].storage_dir,
            'raw_frames/' + dataset_serial,
        )
        self.assertEqual(
            global_query[0].nbr_frames,
            6,
        )
        self.assertEqual(
            global_query[0].im_width,
            15,
        )
        self.assertEqual(
            global_query[0].im_height,
            10,
        )
        self.assertEqual(global_query[0].nbr_slices, 2)
        self.assertEqual(
            global_query[0].nbr_channels,
            3,
        )
        self.assertEqual(
            global_query[0].nbr_positions,
            1,
        )
        self.assertEqual(
            global_query[0].nbr_timepoints,
            1,
        )
        self.assertEqual(
            global_query[0].im_colors,
            1,
        )
        self.assertEqual(
            global_query[0].bit_depth,
            'uint8',
        )
        # query frames
        frames = self.session.query(db_ops.Frames) \
            .join(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial) \
            .order_by(db_ops.Frames.file_name)
        # Validate content
        # Channel numbers will be assigned alphabetically
        channel_names.sort()
        for i, (c, z) in enumerate(itertools.product(range(3), range(2))):
            im_name = 'im_c00{}_z00{}_t060_p050.png'.format(c, z)
            self.assertEqual(frames[i].file_name, im_name)
            self.assertEqual(frames[i].channel_idx, c)
            self.assertEqual(frames[i].channel_name, channel_names[c])
            self.assertEqual(frames[i].slice_idx, z)
            self.assertEqual(frames[i].time_idx, 60)
            self.assertEqual(frames[i].pos_idx, 50)
            self.assertEqual(
                frames[i].sha256,
                meta_utils.gen_sha256(im + 50 * z),
            )
        # # Download frames from storage and compare to originals
        for i in range(len(channel_names)):
            for z in range(2):
                im_name = 'im_c00{}_z00{}_t060_p050.png'.format(i, z)
                im_path = os.path.join(
                    self.mount_point,
                    'raw_frames',
                    dataset_serial,
                    im_name,
                )
            im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH)
            nose.tools.assert_equal(im.dtype, np.uint8)
            numpy.testing.assert_array_equal(im_out, im + z * 50)
    def test_upload_ometif(self, mock_session):
        mock_session.return_value.__enter__.return_value = self.session

        dataset_serial = 'ISP-2005-01-01-01-00-00-0001'
        # Temporary frame
        im = np.ones((10, 15), dtype=np.uint16)
        # Metadata
        ijmeta = {
            "Info":
            json.dumps({
                "InitialPositionList": [{
                    "Label": "Pos1"
                }, {
                    "Label": "Pos3"
                }]
            }),
        }
        channel_ids = [1, 2]
        im_names = ['test_Pos1.ome.tif', 'test_Pos3.ome.tif']
        for i, c in enumerate(channel_ids):
            mmmetadata = json.dumps({
                "ChannelIndex": c,
                "Slice": 20,
                "FrameIndex": 30,
                "PositionIndex": 40,
                "Channel": 'channel_{}'.format(c),
            })
            extra_tags = [('MicroManagerMetadata', 's', 0, mmmetadata, True)]
            # Save test ome tif file
            file_path = os.path.join(self.temp_path, im_names[i])
            tifffile.imsave(
                file_path,
                im + i * 10000,
                ijmetadata=ijmeta,
                extratags=extra_tags,
            )

        schema_file_path = os.path.realpath(
            os.path.join(self.main_dir, 'metadata_schema.json'), )
        # Create csv file for upload
        upload_dict = {
            'dataset_id': [dataset_serial],
            'file_name': [self.temp_path],
            'description': ['Testing'],
            'positions': [[1, 3]],
            'schema_filename': [schema_file_path],
        }
        upload_csv = pd.DataFrame.from_dict(upload_dict)
        csv_path = os.path.join(self.temp_path, "test_ometif_upload.csv")
        upload_csv.to_csv(csv_path)
        config_path = os.path.join(
            self.temp_path,
            'config_ome_tiff.json',
        )
        config = {
            "upload_type": "frames",
            "frames_format": "ome_tiff",
            "microscope": "",
            "schema_filename": "metadata_schema.json",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, config_path)
        # Upload data
        data_uploader.upload_data_and_update_db(
            csv=csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and frames
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertEqual(dataset.id, 1)
        self.assertTrue(dataset.frames)
        self.assertEqual(dataset.dataset_serial, dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2005)
        self.assertEqual(date_time.month, 1)
        self.assertEqual(date_time.day, 1)
        self.assertEqual(dataset.description, 'Testing')
        # query frames_global
        global_query = self.session.query(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(
            global_query[0].storage_dir,
            'raw_frames/' + dataset_serial,
        )
        self.assertEqual(
            global_query[0].nbr_frames,
            2,
        )
        im_shape = im.shape
        self.assertEqual(
            global_query[0].im_width,
            im_shape[1],
        )
        self.assertEqual(
            global_query[0].im_height,
            im_shape[0],
        )
        self.assertEqual(global_query[0].nbr_slices, 1)
        self.assertEqual(
            global_query[0].nbr_channels,
            2,
        )
        self.assertEqual(
            global_query[0].nbr_positions,
            1,
        )
        self.assertEqual(
            global_query[0].nbr_timepoints,
            1,
        )
        self.assertEqual(
            global_query[0].im_colors,
            1,
        )
        self.assertEqual(
            global_query[0].bit_depth,
            'uint16',
        )
        # query frames
        frames = self.session.query(db_ops.Frames) \
            .join(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial) \
            .order_by(db_ops.Frames.file_name)

        shas = [meta_utils.gen_sha256(im), meta_utils.gen_sha256(im + 10000)]
        for i, c in enumerate(channel_ids):
            im_name = 'im_c00{}_z020_t030_p040.png'.format(c)
            self.assertEqual(frames[i].file_name, im_name)
            self.assertEqual(frames[i].channel_idx, c)
            self.assertEqual(frames[i].channel_name, 'channel_{}'.format(c))
            self.assertEqual(frames[i].slice_idx, 20)
            self.assertEqual(frames[i].time_idx, 30)
            self.assertEqual(frames[i].pos_idx, 40)
            self.assertEqual(frames[i].sha256, shas[i])
        # # Download frames from storage and compare to originals
        for i, c in enumerate(channel_ids):
            im_name = 'im_c00{}_z020_t030_p040.png'.format(c)
            im_path = os.path.join(
                self.mount_point,
                'raw_frames',
                dataset_serial,
                im_name,
            )
            im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH)
            nose.tools.assert_equal(im.dtype, np.uint16)
            numpy.testing.assert_array_equal(im_out, im + i * 10000)
    def test_upload_ometif(self, mock_session):
        mock_session.return_value.__enter__.return_value = self.session

        dataset_serial = 'ISP-2005-01-01-01-00-00-0001'
        # Temporary frame
        im = np.ones((10, 15), dtype=np.uint16)
        # Metadata
        mmmetadata = json.dumps({
            "ChannelIndex": 1,
            "Slice": 2,
            "FrameIndex": 3,
            "PositionIndex": 4,
            "Channel": 'channel_name',
        })
        extra_tags = [('MicroManagerMetadata', 's', 0, mmmetadata, True)]
        ijmeta = {
            "Info":
            json.dumps({
                "InitialPositionList": [{
                    "Label": "Pos1"
                }, {
                    "Label": "Pos3"
                }]
            }),
        }
        # Save test ome tif file
        file_path = os.path.join(self.temp_path, "test_Pos1.ome.tif")
        tifffile.imsave(
            file_path,
            im,
            ijmetadata=ijmeta,
            extratags=extra_tags,
        )
        # Get path to json schema file
        dir_name = os.path.dirname(__file__)
        schema_file_path = os.path.realpath(
            os.path.join(dir_name, '..', '..', 'metadata_schema.json'), )
        # Create csv file for upload
        upload_dict = {
            'dataset_id': [dataset_serial],
            'file_name': [file_path],
            'description': ['Testing'],
            'positions': [1],
            'schema_filename': [schema_file_path],
        }
        upload_csv = pd.DataFrame.from_dict(upload_dict)
        csv_path = os.path.join(self.temp_path, "test_ometif_upload.csv")
        upload_csv.to_csv(csv_path)
        config_path = os.path.join(
            self.temp_path,
            'config_ome_tiff.json',
        )
        config = {
            "upload_type": "frames",
            "frames_format": "ome_tiff",
            "microscope": "",
            "schema_filename": "metadata_schema.json",
            "storage": "s3",
        }
        json_ops.write_json_file(config, config_path)
        # Upload data
        data_uploader.upload_data_and_update_db(
            csv=csv_path,
            login=self.credentials_path,
            config=config_path,
        )
Exemple #11
0
def download_data(dataset_serial,
                  login,
                  dest,
                  storage='local',
                  storage_access=None,
                  metadata=True,
                  download=True,
                  nbr_workers=None,
                  positions=None,
                  times=None,
                  channels=None,
                  slices=None):
    """
    Find all files associated with unique project identifier and
    download them to a local directory.

    :param str dataset_serial: Unique dataset identifier
    :param str login: Full path to json file containing database login
                credentials
    :param str dest: Local destination directory name
    :param str storage: 'local' (default) - data will be stored locally and
                synced to S3 the same day. Or 'S3' - data will be uploaded
                directly to S3 then synced with local storage daily.
    :param str/None storage_access: If not using predefined storage locations,
                this parameter refers to mount_point for local storage and
                bucket_name for S3 storage.
    :param bool download: Downloads all files associated with dataset (default)
                If False, will only write csvs with metadata. Only for
                datasets split into frames
    :param bool metadata: Writes metadata (default True)
                global metadata in json, local for each frame in csv
    :param int, None nbr_workers: Number of workers for parallel download
                If None, it defaults to number of machine processors * 5
    :param list, None positions: Positions (FOVs) as integers (default
                None downloads all)
    :param list, None times: Timepoints as integers (default None downloads all)
    :param list, None channels: Channels as integer indices or strings for channel
                names (default None downloads all)
    :param list, None slices: Slice (z) integer indices (Default None downloads all)
    """
    try:
        cli_utils.validate_id(dataset_serial)
    except AssertionError as e:
        raise AssertionError("Invalid ID:", e)

    # Create output directory as a subdirectory in dest named
    # dataset_serial. It stops if the subdirectory already exists to avoid
    # the risk of overwriting existing data
    dest_dir = os.path.join(dest, dataset_serial)
    try:
        os.makedirs(dest_dir, exist_ok=False)
    except FileExistsError as e:
        raise FileExistsError("Folder {} already exists, {}".format(
            dest_dir, e))

    # Get database connection URI
    db_connection = db_utils.get_connection_str(login)
    db_utils.check_connection(db_connection)

    # Instantiate database class
    db_inst = db_ops.DatabaseOperations(dataset_serial=dataset_serial, )
    # Import local or S3 storage class
    storage_class = aux_utils.get_storage_class(storage_type=storage)

    if metadata is False:
        # Just download file(s)
        assert download,\
            "You set metadata *and* download to False. You get nothing."
        with db_ops.session_scope(db_connection) as session:
            storage_dir, file_names = db_inst.get_filenames(session=session, )
    else:
        # If channels can be converted to ints, they're indices
        if channels is not None:
            if not isinstance(channels, list):
                channels = [channels]
            try:
                channels = [int(c) for c in channels]
            except ValueError:
                # Channels are names, not indices
                assert all([isinstance(c, str) for c in channels]), \
                    "channels must be either all str or int"

        # Get the metadata from the requested frames
        with db_ops.session_scope(db_connection) as session:
            global_meta, frames_meta = db_inst.get_frames_meta(
                session=session,
                positions=positions,
                times=times,
                channels=channels,
                slices=slices,
            )
        # Write global metadata to destination directory
        global_meta_filename = os.path.join(
            dest_dir,
            "global_metadata.json",
        )
        json_ops.write_json_file(
            meta_dict=global_meta,
            json_filename=global_meta_filename,
        )
        # Write info for each frame to destination directory
        local_meta_filename = os.path.join(
            dest_dir,
            "frames_meta.csv",
        )
        frames_meta.to_csv(local_meta_filename, sep=",")
        # Extract folder and file names if we want to download
        storage_dir = global_meta["storage_dir"]
        file_names = frames_meta["file_name"]

    if download:
        if nbr_workers is not None:
            assert nbr_workers > 0,\
                "Nbr of worker must be >0, not {}".format(nbr_workers)
        data_loader = storage_class(
            storage_dir=storage_dir,
            nbr_workers=nbr_workers,
            access_point=storage_access,
        )
        data_loader.download_files(file_names, dest_dir)