Exemple #1
0
 def test_download_frames(self, mock_session):
     mock_session.return_value.__enter__.return_value = self.session
     # Create dest dir
     self.tempdir.makedir('dest_dir')
     dest_dir = os.path.join(self.temp_path, 'dest_dir')
     # Download data
     data_downloader.download_data(
         dataset_serial=self.dataset_serial,
         login=self.credentials_path,
         dest=dest_dir,
         storage_access=self.mount_point,
     )
     # Images are separated by slice first then channel
     im_order = [0, 2, 4, 1, 3, 5]
     it = itertools.product(range(self.nbr_channels),
                            range(self.nbr_slices))
     for i, (c, z) in enumerate(it):
         im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z)
         im_path = os.path.join(
             dest_dir,
             self.dataset_serial,
             im_name,
         )
         im = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH)
         numpy.testing.assert_array_equal(im, self.im[im_order[i], ...])
     # Read and validate frames meta
     meta_path = os.path.join(
         dest_dir,
         self.dataset_serial,
         'frames_meta.csv',
     )
     frames_meta = pd.read_csv(meta_path)
     for i, row in frames_meta.iterrows():
         c = i // self.nbr_slices
         z = i % self.nbr_slices
         self.assertEqual(row.channel_idx, c)
         self.assertEqual(row.slice_idx, z)
         self.assertEqual(row.time_idx, 0)
         self.assertEqual(row.pos_idx, 0)
         im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z)
         self.assertEqual(row.file_name, im_name)
         sha256 = meta_utils.gen_sha256(self.im[im_order[i], ...])
         self.assertEqual(row.sha256, sha256)
     # Read and validate global meta
     meta_path = os.path.join(
         dest_dir,
         self.dataset_serial,
         'global_metadata.json',
     )
     meta_json = json_ops.read_json_file(meta_path)
     self.assertEqual(meta_json['storage_dir'], self.frames_storage_dir)
     self.assertEqual(meta_json['nbr_frames'], 6)
     self.assertEqual(meta_json['im_width'], 15)
     self.assertEqual(meta_json['im_height'], 10)
     self.assertEqual(meta_json['nbr_slices'], self.nbr_slices)
     self.assertEqual(meta_json['nbr_channels'], self.nbr_channels)
     self.assertEqual(meta_json['im_colors'], 1)
     self.assertEqual(meta_json['nbr_timepoints'], 1)
     self.assertEqual(meta_json['nbr_positions'], 1)
     self.assertEqual(meta_json['bit_depth'], 'uint16')
    def test_get_stack_from_meta(self):
        # Upload image stack
        storage_dir = "raw_frames/ML-2005-05-23-10-00-00-0001"
        self.data_storage.upload_frames(self.stack_names, self.im_stack)
        global_meta = {
            "storage_dir": storage_dir,
            "nbr_frames": 5,
            "im_height": 10,
            "im_width": 15,
            "nbr_slices": 5,
            "nbr_channels": 1,
            "im_colors": 1,
            "bit_depth": "uint16",
            "nbr_timepoints": 1,
            "nbr_positions": 1,
        }
        # Download slices 1:4
        frames_meta = meta_utils.make_dataframe(nbr_frames=3)
        for i in range(3):
            sha = meta_utils.gen_sha256(self.im_stack[..., i + 1])
            frames_meta.loc[i] = [0, i + 1, 0, "A", self.stack_names[i + 1], 0, sha]

        im_stack, dim_order = self.data_storage.get_stack_from_meta(
            global_meta=global_meta,
            frames_meta=frames_meta,
        )
        # Stack has X = 10, Y = 15, grayscale, Z = 3, C = 1, T = 1, P = 1
        # so expected stack shape and order should be:
        expected_shape = (10, 15, 3)
        nose.tools.assert_equal(im_stack.shape, expected_shape)
        nose.tools.assert_equal(dim_order, "XYZ")
    def serialize_upload(self, frame_file_tuple):
        """
        Given a path for a tif file and its database file name,
        read the file, serialize it and upload it. Extract file metadata.

        :param tuple frame_file_tuple: Path to tif file and S3 + DB file name
        :return str sha256: Checksum for image
        :return dict dict_i: JSON metadata for frame
        """
        frame_path, frame_name = frame_file_tuple
        im = tifffile.TiffFile(frame_path)
        tiftags = im.pages[0].tags
        # Get all frame specific metadata
        dict_i = {}
        for t in tiftags.keys():
            dict_i[t] = tiftags[t].value

        im = im.asarray()
        sha256 = meta_utils.gen_sha256(im)
        # Upload to S3 with global client
        data_uploader.upload_im(
            im_name=frame_name,
            im=im,
            file_format=self.file_format,
        )
        # Do a json dumps otherwise some metadata won't pickle
        return sha256, json.dumps(dict_i)
Exemple #4
0
def test_gen_sha256_numpy():
    expected_sha = 'd1b8118646637256b66ef034778f8d0add8d00436ad1ebb051ef09cf19dbf2d2'

    # Temporary file with 6 frames, tifffile stores channels first
    im = 50 * np.ones((6, 50, 50), dtype=np.uint16)

    sha = meta_utils.gen_sha256(im)
    nose.tools.assert_equal(expected_sha, sha)
    def test_upload_file(self, mock_session):
        # Upload the same file but as file instead of frames
        mock_session.return_value.__enter__.return_value = self.session

        config_path = os.path.join(
            self.temp_path,
            'config_file.json',
        )
        config = {
            "upload_type": "file",
            "microscope": "Mass Spectrometry",
            "storage": "s3",
        }
        json_ops.write_json_file(config, config_path)
        data_uploader.upload_data_and_update_db(
            csv=self.csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and file_global
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertEqual(dataset.id, 1)
        self.assertFalse(dataset.frames)
        self.assertEqual(dataset.dataset_serial, self.dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2005)
        self.assertEqual(date_time.month, 6)
        self.assertEqual(date_time.day, 9)
        self.assertEqual(dataset.microscope, "Mass Spectrometry")
        self.assertEqual(dataset.description, 'Testing')
        # query file_global
        file_global = self.session.query(db_ops.FileGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \
            .one()
        expected_s3 = "raw_files/TEST-2005-06-09-20-00-00-1000"
        self.assertEqual(
            file_global.storage_dir,
            expected_s3,
        )
        expected_meta = {'file_origin': self.file_path}
        self.assertDictEqual(file_global.metadata_json, expected_meta)
        self.assertEqual(file_global.data_set, dataset)
        sha256 = meta_utils.gen_sha256(self.file_path)
        self.assertEqual(file_global.sha256, sha256)
        # Check that file has been uploaded
        s3_client = boto3.client('s3')
        key = os.path.join(expected_s3, "A1_2_PROTEIN_test.tif")
        # Just check that the file is there, we've dissected it before
        response = s3_client.list_objects_v2(Bucket=self.bucket_name,
                                             Prefix=key)
        self.assertEqual(response['KeyCount'], 1)
Exemple #6
0
    def _generate_hash(self, im_stack):
        """
        calculates the sha256 checksum for all image slices

        :param ndarray im_stack: image to be hashed
        :return list sha: sha256 hashes indexed by the image index
        """
        sha = []
        for i in range(im_stack.shape[3]):
            sha.append(meta_utils.gen_sha256(im_stack[..., i]))

        return sha
    def test_upload_file(self, mock_session):
        # Upload the same file but as file instead of frames
        mock_session.return_value.__enter__.return_value = self.session

        config_path = os.path.join(
            self.temp_path,
            'config_file.json',
        )
        config = {
            "upload_type": "file",
            "microscope": "Mass Spectrometry",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, config_path)
        data_uploader.upload_data_and_update_db(
            csv=self.csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and file_global
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertEqual(dataset.id, 1)
        self.assertFalse(dataset.frames)
        self.assertEqual(dataset.dataset_serial, self.dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2005)
        self.assertEqual(date_time.month, 6)
        self.assertEqual(date_time.day, 9)
        self.assertEqual(dataset.microscope, "Mass Spectrometry")
        self.assertEqual(dataset.description, 'Testing')
        # query file_global
        file_global = self.session.query(db_ops.FileGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \
            .one()
        expected_dir = "raw_files/TEST-2005-06-09-20-00-00-1000"
        self.assertEqual(
            file_global.storage_dir,
            expected_dir,
        )
        expected_meta = {'file_origin': self.file_path}
        self.assertDictEqual(file_global.metadata_json, expected_meta)
        self.assertEqual(file_global.data_set, dataset)
        sha256 = meta_utils.gen_sha256(self.file_path)
        self.assertEqual(file_global.sha256, sha256)
        # Check that file has been uploaded
        file_path = os.path.join(self.mount_point, expected_dir,
                                 'A1_2_PROTEIN_test.tif')
        self.assertTrue(os.path.exists(file_path))
Exemple #8
0
def migrate_db(credentials_filename):
    """
    Updates sha256 checksums for all files and frames

    :param credentials_filename: Full path to DB credentials file
    """
    # Edit this depending on where your database credential file is stored
    # This assumes it's stored in dir above imagingDB
    dir_name = os.path.abspath(os.path.join('..'))
    dest_dir = os.path.join(dir_name, 'temp_downloads')
    os.makedirs(dest_dir, exist_ok=True)
    credentials_str = db_utils.get_connection_str(
        credentials_filename=credentials_filename, )
    # Get files and compute checksums
    with db_ops.session_scope(credentials_str) as session:
        files = session.query(db_ops.FileGlobal)
        for file in files:
            if file.sha256 is None:
                data_loader = s3_storage.S3Storage(
                    storage_dir=file.storage_dir, )
                file_name = file.metadata_json["file_origin"]
                file_name = file_name.split("/")[-1]
                dest_path = os.path.join(dest_dir, file_name)
                data_loader.download_file(
                    file_name=file_name,
                    dest_path=dest_path,
                )
                checksum = meta_utils.gen_sha256(dest_path)
                file.sha256 = checksum

    # Get frames and compute checksums
    with db_ops.session_scope(credentials_filename) as session:
        frames = session.query(db_ops.Frames)
        for frame in frames:
            if frame.sha256 is None:
                data_loader = s3_storage.S3Storage(
                    storage_dir=frame.frames_global.storage_dir, )
                im = data_loader.get_im(frame.file_name)
                checksum = meta_utils.gen_sha256(im)
                frame.sha256 = checksum
Exemple #9
0
 def test_get_frames_meta(self):
     frames_meta = self.frames_inst.get_frames_meta()
     for i, (c, z) in enumerate(itertools.product(range(3), range(2))):
         # Validate file name
         expected_name = 'im_c00{}_z00{}_t000_p050.png'.format(c, z)
         self.assertEqual(frames_meta.loc[i, 'file_name'], expected_name)
         # Validate checksum
         expected_sha = meta_utils.gen_sha256(self.im + 5000 * z)
         self.assertEqual(frames_meta.loc[i, 'sha256'], expected_sha)
         # Validate indices
         self.assertEqual(frames_meta.loc[i, 'channel_idx'], c)
         self.assertEqual(frames_meta.loc[i, 'slice_idx'], z)
         self.assertEqual(frames_meta.loc[i, 'time_idx'], 0)
         self.assertEqual(frames_meta.loc[i, 'pos_idx'], 50)
Exemple #10
0
 def test_get_frames_no_metadata(self, MockPoolExecutor):
     # Magic mocking of multiprocessing
     MockPoolExecutor().__enter__().map = map_mock
     os.remove(self.json_filename)
     self.frames_inst.get_frames_and_metadata(
         filename_parser='parse_sms_name', )
     frames_meta = self.frames_inst.get_frames_meta()
     for i, (c, z) in enumerate(itertools.product(range(3), range(2))):
         # Validate file name
         expected_name = 'im_c00{}_z00{}_t000_p050.png'.format(c, z)
         self.assertEqual(frames_meta.loc[i, 'file_name'], expected_name)
         # Validate checksum
         expected_sha = meta_utils.gen_sha256(self.im + 5000 * z)
         self.assertEqual(frames_meta.loc[i, 'sha256'], expected_sha)
         # Validate indices
         self.assertEqual(frames_meta.loc[i, 'channel_idx'], c)
         self.assertEqual(frames_meta.loc[i, 'slice_idx'], z)
         self.assertEqual(frames_meta.loc[i, 'time_idx'], 0)
         self.assertEqual(frames_meta.loc[i, 'pos_idx'], 50)
Exemple #11
0
def test_gen_sha256_file():
    expected_sha = 'af87894cc23928df908b02bd94842d063a5c7aae9eb1bbc2bb5c9475d674bcba'

    with TempDirectory() as temp_dir:
        temp_path = temp_dir.path

        # Temporary file with 6 frames, tifffile stores channels first
        im = 50 * np.ones((6, 10, 15), dtype=np.uint16)
        im[0, :5, 3:12] = 50000
        im[2, :5, 3:12] = 40000
        im[4, :5, 3:12] = 30000

        description = 'ImageJ=1.52e\nimages=6\nchannels=2\nslices=3\nmax=10411.0'

        # Save test tif file
        file_path = os.path.join(temp_path, "A1_2_PROTEIN_test.tif")
        tifffile.imsave(file=file_path,
                        data=im,
                        description=description,
                        datetime=datetime.datetime(2019, 1, 1))

        sha = meta_utils.gen_sha256(file_path)
    nose.tools.assert_equal(expected_sha, sha)
Exemple #12
0
    def test_get_stack_from_meta(self):
        # Upload image stack
        storage_dir = "raw_frames/ML-2005-05-23-10-00-00-0001"
        data_storage = s3_storage.S3Storage(storage_dir, self.nbr_workers)
        data_storage.upload_frames(self.stack_names, self.im_stack)
        global_meta = {
            "storage_dir": storage_dir,
            "nbr_frames": 2,
            "im_height": 10,
            "im_width": 15,
            "nbr_slices": 1,
            "nbr_channels": 2,
            "im_colors": 1,
            "bit_depth": "uint16",
            "nbr_timepoints": 1,
            "nbr_positions": 1,
        }
        frames_meta = meta_utils.make_dataframe(
            nbr_frames=global_meta["nbr_frames"], )

        nbr_frames = self.im_stack.shape[2]
        sha = [None] * nbr_frames
        for i in range(nbr_frames):
            sha[i] = meta_utils.gen_sha256(self.im_stack[..., i])

        frames_meta.loc[0] = [0, 0, 0, "A", "im1.png", 0, sha[0]]
        frames_meta.loc[1] = [1, 0, 0, "B", "im2.png", 0, sha[1]]
        im_stack, dim_order = data_storage.get_stack_from_meta(
            global_meta,
            frames_meta,
        )
        # Stack has X = 10, Y = 15, grayscale, Z = 1, C = 2, T = 1, P = 1
        # so expected stack shape and order should be:
        expected_shape = (10, 15, 2)
        nose.tools.assert_equal(im_stack.shape, expected_shape)
        nose.tools.assert_equal(dim_order, "XYC")
Exemple #13
0
def upload_data_and_update_db(csv,
                              login,
                              config,
                              nbr_workers=None,
                              overwrite=False):
    """
    Takes a csv file in which each row represents a dataset, uploads the data
    to storage and metadata to database. If 'frames' is selected as upload
    type, each dataset will be split into individual 2D frames before moving
    to storage.
    TODO: Add logging instead of printing

    :param str login: Full path to json file containing login credentials
    :param str csv: Full path to csv file containing the following fields
        for each file to be uploaded:
            str dataset_id: Unique dataset ID <ID>-YYYY-MM-DD-HH-MM-SS-<SSSS>
            str file_name: Full path to file to be uploaded
            str description: Short description of file
            str parent_dataset_id: Parent dataset unique ID if there is one
                list positions: Which position files in folder to upload.
                Uploads all if left empty and file_name is a folder.
                Only valid for ome-tiff uploads.
    :param  str config: Full path to json config file containing the fields:
            str upload_type: Specify if the file should be split prior to upload
                Valid options: 'frames' or 'file'
            str frames_format: Which file splitter class to use.
                Valid options:
                'ome_tiff' needs MicroManagerMetadata tag for each frame for metadata
                'tif_folder' when each file is already an individual frame
                and relies on MicroManager metadata
                'tif_id' needs ImageDescription tag in first frame page for metadata
            str storage: 'local' (default) - data will be stored locally and
                synced to S3 the same day. Or 'S3' - data will be uploaded
                directly to S3 then synced with local storage daily.
            str storage_access: If not using predefined storage locations,
                this parameter refers to mount_point for local storage and
                bucket_name for S3 storage. (optional)
            str json_meta: If splitting to frames, give full path to json
                metadata schema for reading metadata (optional)
    :param int, None nbr_workers: Number of workers for parallel uploads
    :param bool overwrite: Use with caution if your upload if your upload was
            interrupted and you want to overwrite existing data in database
            and storage
    """
    # Assert that csv file exists and load it
    assert os.path.isfile(csv), \
        "File doesn't exist: {}".format(csv)
    files_data = pd.read_csv(csv)

    # Get database connection URI
    db_connection = db_utils.get_connection_str(login)
    db_utils.check_connection(db_connection)
    # Read and validate config json
    config_json = json_ops.read_json_file(
        json_filename=config,
        schema_name="CONFIG_SCHEMA",
    )
    # Assert that upload type is valid
    upload_type = config_json['upload_type'].lower()
    assert upload_type in {"file", "frames"}, \
        "upload_type should be 'file' or 'frames', not {}".format(
            upload_type,
        )
    if nbr_workers is not None:
        assert nbr_workers > 0, \
            "Nbr of worker must be >0, not {}".format(nbr_workers)
    # Import local or S3 storage class
    storage = 'local'
    if 'storage' in config_json:
        storage = config_json['storage']
    storage_class = aux_utils.get_storage_class(storage_type=storage)
    storage_access = None
    if 'storage_access' in config_json:
        storage_access = config_json['storage_access']

    # Make sure microscope is a string
    microscope = None
    if 'microscope' in config_json:
        if isinstance(config_json['microscope'], str):
            microscope = config_json['microscope']

    if upload_type == 'frames':
        # If upload type is frames, check from frames format
        assert 'frames_format' in config_json, \
            'You must specify the type of file(s)'
        splitter_class = aux_utils.get_splitter_class(
            config_json['frames_format'],
        )
    # Upload all files
    for file_nbr, row in files_data.iterrows():
        # Assert that ID is correctly formatted
        dataset_serial = row.dataset_id
        try:
            cli_utils.validate_id(dataset_serial)
        except AssertionError as e:
            raise AssertionError("Invalid ID:", e)

        # Get S3 directory based on upload type
        if upload_type == "frames":
            storage_dir = "/".join([FRAME_FOLDER_NAME, dataset_serial])
        else:
            storage_dir = "/".join([FILE_FOLDER_NAME, dataset_serial])
        # Instantiate database operations class
        db_inst = db_ops.DatabaseOperations(
            dataset_serial=dataset_serial,
        )
        # Make sure dataset is not already in database
        if not overwrite:
            with db_ops.session_scope(db_connection) as session:
                db_inst.assert_unique_id(session)
        # Check for parent dataset
        parent_dataset_id = 'None'
        if 'parent_dataset_id' in row:
            parent_dataset_id = row.parent_dataset_id
        # Check for dataset description
        description = None
        if 'description' in row:
            if row.description == row.description:
                description = row.description

        if upload_type == "frames":
            # Instantiate splitter class
            frames_inst = splitter_class(
                data_path=row.file_name,
                storage_dir=storage_dir,
                storage_class=storage_class,
                storage_access=storage_access,
                overwrite=overwrite,
                file_format=FRAME_FILE_FORMAT,
                nbr_workers=nbr_workers,
            )
            # Get kwargs if any
            kwargs = {}
            if 'positions' in row:
                positions = row['positions']
                if not pd.isna(positions):
                    kwargs['positions'] = positions
            if 'schema_filename' in config_json:
                kwargs['schema_filename'] = config_json['schema_filename']
            if 'filename_parser' in config_json:
                filename_parser = config_json['filename_parser']
                kwargs['filename_parser'] = filename_parser
            # Extract metadata and split file into frames
            frames_inst.get_frames_and_metadata(**kwargs)

            # Add frames metadata to database
            try:
                with db_ops.session_scope(db_connection) as session:
                    db_inst.insert_frames(
                        session=session,
                        description=description,
                        frames_meta=frames_inst.get_frames_meta(),
                        frames_json_meta=frames_inst.get_frames_json(),
                        global_meta=frames_inst.get_global_meta(),
                        global_json_meta=frames_inst.get_global_json(),
                        microscope=microscope,
                        parent_dataset=parent_dataset_id,
                    )
            except AssertionError as e:
                print("Data set {} already in DB".format(dataset_serial), e)
        # File upload
        else:
            # Just upload file without opening it
            assert os.path.isfile(row.file_name), \
                "File doesn't exist: {}".format(row.file_name)
            data_uploader = storage_class(
                storage_dir=storage_dir,
                access_point=storage_access,
            )
            if not overwrite:
                data_uploader.assert_unique_id()
            try:
                data_uploader.upload_file(file_path=row.file_name)
                print("File {} uploaded to S3".format(row.file_name))
            except AssertionError as e:
                print("File already on S3, moving on to DB entry. {}".format(e))

            sha = meta_utils.gen_sha256(row.file_name)
            # Add file entry to DB once I can get it tested
            global_json = {"file_origin": row.file_name}
            file_name = row.file_name.split("/")[-1]
            try:
                with db_ops.session_scope(db_connection) as session:
                    db_inst.insert_file(
                        session=session,
                        description=description,
                        storage_dir=storage_dir,
                        file_name=file_name,
                        global_json_meta=global_json,
                        microscope=microscope,
                        parent_dataset=parent_dataset_id,
                        sha256=sha,
                    )
                print("File info for {} inserted in DB".format(dataset_serial))
            except AssertionError as e:
                print("File {} already in database".format(dataset_serial))
    def test_upload_tiffolder(self, mock_session):
        mock_session.return_value.__enter__.return_value = self.session

        dataset_serial = 'SMS-2010-01-01-01-00-00-0005'
        # Temporary frame
        im = np.ones((10, 15), dtype=np.uint8)

        # Save test tif files
        self.tempdir.makedir('tiffolder')
        tif_dir = os.path.join(self.temp_path, 'tiffolder')
        channel_names = ['phase', 'brightfield', '666']
        # Write files in dir
        for c_name in channel_names:
            for z in range(2):
                file_name = 'img_{}_t060_p050_z00{}.tif'.format(c_name, z)
                file_path = os.path.join(tif_dir, file_name)
                ijmeta = {"Info": json.dumps({"c": c_name, "z": z})}
                tifffile.imsave(
                    file_path,
                    im + 50 * z,
                    ijmetadata=ijmeta,
                )
        # Write external metadata in dir
        self.meta_dict = {
            'Summary': {
                'Slices': 6,
                'PixelType': 'GRAY8',
                'Time': '2018-11-01 19:20:34 -0700',
                'z-step_um': 0.5,
                'PixelSize_um': 0,
                'BitDepth': 8,
                'Width': 15,
                'Height': 10
            },
        }
        self.json_filename = os.path.join(tif_dir, 'metadata.txt')
        json_ops.write_json_file(self.meta_dict, self.json_filename)

        # Create csv file for upload
        upload_dict = {
            'dataset_id': [dataset_serial],
            'file_name': [tif_dir],
            'description': ['Testing tifffolder upload'],
        }
        upload_csv = pd.DataFrame.from_dict(upload_dict)
        csv_path = os.path.join(self.temp_path, "test_tiffolder_upload.csv")
        upload_csv.to_csv(csv_path)
        config_path = os.path.join(
            self.temp_path,
            'config_tiffolder.json',
        )
        config = {
            "upload_type": "frames",
            "frames_format": "tif_folder",
            "microscope": "CZDRAGONFLY-PC",
            "filename_parser": "parse_sms_name",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, config_path)
        # Upload data
        data_uploader.upload_data_and_update_db(
            csv=csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and frames
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertTrue(dataset.frames)
        self.assertEqual(dataset.dataset_serial, dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2010)
        self.assertEqual(date_time.month, 1)
        self.assertEqual(date_time.day, 1)
        self.assertEqual(dataset.description, 'Testing tifffolder upload')
        # query frames_global
        global_query = self.session.query(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(
            global_query[0].storage_dir,
            'raw_frames/' + dataset_serial,
        )
        self.assertEqual(
            global_query[0].nbr_frames,
            6,
        )
        self.assertEqual(
            global_query[0].im_width,
            15,
        )
        self.assertEqual(
            global_query[0].im_height,
            10,
        )
        self.assertEqual(global_query[0].nbr_slices, 2)
        self.assertEqual(
            global_query[0].nbr_channels,
            3,
        )
        self.assertEqual(
            global_query[0].nbr_positions,
            1,
        )
        self.assertEqual(
            global_query[0].nbr_timepoints,
            1,
        )
        self.assertEqual(
            global_query[0].im_colors,
            1,
        )
        self.assertEqual(
            global_query[0].bit_depth,
            'uint8',
        )
        # query frames
        frames = self.session.query(db_ops.Frames) \
            .join(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial) \
            .order_by(db_ops.Frames.file_name)
        # Validate content
        # Channel numbers will be assigned alphabetically
        channel_names.sort()
        for i, (c, z) in enumerate(itertools.product(range(3), range(2))):
            im_name = 'im_c00{}_z00{}_t060_p050.png'.format(c, z)
            self.assertEqual(frames[i].file_name, im_name)
            self.assertEqual(frames[i].channel_idx, c)
            self.assertEqual(frames[i].channel_name, channel_names[c])
            self.assertEqual(frames[i].slice_idx, z)
            self.assertEqual(frames[i].time_idx, 60)
            self.assertEqual(frames[i].pos_idx, 50)
            self.assertEqual(
                frames[i].sha256,
                meta_utils.gen_sha256(im + 50 * z),
            )
        # # Download frames from storage and compare to originals
        for i in range(len(channel_names)):
            for z in range(2):
                im_name = 'im_c00{}_z00{}_t060_p050.png'.format(i, z)
                im_path = os.path.join(
                    self.mount_point,
                    'raw_frames',
                    dataset_serial,
                    im_name,
                )
            im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH)
            nose.tools.assert_equal(im.dtype, np.uint8)
            numpy.testing.assert_array_equal(im_out, im + z * 50)
    def test_upload_ometif(self, mock_session):
        mock_session.return_value.__enter__.return_value = self.session

        dataset_serial = 'ISP-2005-01-01-01-00-00-0001'
        # Temporary frame
        im = np.ones((10, 15), dtype=np.uint16)
        # Metadata
        ijmeta = {
            "Info":
            json.dumps({
                "InitialPositionList": [{
                    "Label": "Pos1"
                }, {
                    "Label": "Pos3"
                }]
            }),
        }
        channel_ids = [1, 2]
        im_names = ['test_Pos1.ome.tif', 'test_Pos3.ome.tif']
        for i, c in enumerate(channel_ids):
            mmmetadata = json.dumps({
                "ChannelIndex": c,
                "Slice": 20,
                "FrameIndex": 30,
                "PositionIndex": 40,
                "Channel": 'channel_{}'.format(c),
            })
            extra_tags = [('MicroManagerMetadata', 's', 0, mmmetadata, True)]
            # Save test ome tif file
            file_path = os.path.join(self.temp_path, im_names[i])
            tifffile.imsave(
                file_path,
                im + i * 10000,
                ijmetadata=ijmeta,
                extratags=extra_tags,
            )

        schema_file_path = os.path.realpath(
            os.path.join(self.main_dir, 'metadata_schema.json'), )
        # Create csv file for upload
        upload_dict = {
            'dataset_id': [dataset_serial],
            'file_name': [self.temp_path],
            'description': ['Testing'],
            'positions': [[1, 3]],
            'schema_filename': [schema_file_path],
        }
        upload_csv = pd.DataFrame.from_dict(upload_dict)
        csv_path = os.path.join(self.temp_path, "test_ometif_upload.csv")
        upload_csv.to_csv(csv_path)
        config_path = os.path.join(
            self.temp_path,
            'config_ome_tiff.json',
        )
        config = {
            "upload_type": "frames",
            "frames_format": "ome_tiff",
            "microscope": "",
            "schema_filename": "metadata_schema.json",
            "storage": "local",
            "storage_access": self.mount_point
        }
        json_ops.write_json_file(config, config_path)
        # Upload data
        data_uploader.upload_data_and_update_db(
            csv=csv_path,
            login=self.credentials_path,
            config=config_path,
        )
        # Query database to find data_set and frames
        datasets = self.session.query(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(datasets.count(), 1)
        dataset = datasets[0]
        self.assertEqual(dataset.id, 1)
        self.assertTrue(dataset.frames)
        self.assertEqual(dataset.dataset_serial, dataset_serial)
        date_time = dataset.date_time
        self.assertEqual(date_time.year, 2005)
        self.assertEqual(date_time.month, 1)
        self.assertEqual(date_time.day, 1)
        self.assertEqual(dataset.description, 'Testing')
        # query frames_global
        global_query = self.session.query(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial)
        self.assertEqual(
            global_query[0].storage_dir,
            'raw_frames/' + dataset_serial,
        )
        self.assertEqual(
            global_query[0].nbr_frames,
            2,
        )
        im_shape = im.shape
        self.assertEqual(
            global_query[0].im_width,
            im_shape[1],
        )
        self.assertEqual(
            global_query[0].im_height,
            im_shape[0],
        )
        self.assertEqual(global_query[0].nbr_slices, 1)
        self.assertEqual(
            global_query[0].nbr_channels,
            2,
        )
        self.assertEqual(
            global_query[0].nbr_positions,
            1,
        )
        self.assertEqual(
            global_query[0].nbr_timepoints,
            1,
        )
        self.assertEqual(
            global_query[0].im_colors,
            1,
        )
        self.assertEqual(
            global_query[0].bit_depth,
            'uint16',
        )
        # query frames
        frames = self.session.query(db_ops.Frames) \
            .join(db_ops.FramesGlobal) \
            .join(db_ops.DataSet) \
            .filter(db_ops.DataSet.dataset_serial == dataset_serial) \
            .order_by(db_ops.Frames.file_name)

        shas = [meta_utils.gen_sha256(im), meta_utils.gen_sha256(im + 10000)]
        for i, c in enumerate(channel_ids):
            im_name = 'im_c00{}_z020_t030_p040.png'.format(c)
            self.assertEqual(frames[i].file_name, im_name)
            self.assertEqual(frames[i].channel_idx, c)
            self.assertEqual(frames[i].channel_name, 'channel_{}'.format(c))
            self.assertEqual(frames[i].slice_idx, 20)
            self.assertEqual(frames[i].time_idx, 30)
            self.assertEqual(frames[i].pos_idx, 40)
            self.assertEqual(frames[i].sha256, shas[i])
        # # Download frames from storage and compare to originals
        for i, c in enumerate(channel_ids):
            im_name = 'im_c00{}_z020_t030_p040.png'.format(c)
            im_path = os.path.join(
                self.mount_point,
                'raw_frames',
                dataset_serial,
                im_name,
            )
            im_out = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH)
            nose.tools.assert_equal(im.dtype, np.uint16)
            numpy.testing.assert_array_equal(im_out, im + i * 10000)
 def test_upload_frames(self, mock_session):
     mock_session.return_value.__enter__.return_value = self.session
     data_uploader.upload_data_and_update_db(
         csv=self.csv_path,
         login=self.credentials_path,
         config=self.config_path,
     )
     # Query database to find data_set and frames
     datasets = self.session.query(db_ops.DataSet) \
         .filter(db_ops.DataSet.dataset_serial == self.dataset_serial)
     self.assertEqual(datasets.count(), 1)
     dataset = datasets[0]
     self.assertEqual(dataset.id, 1)
     self.assertTrue(dataset.frames)
     self.assertEqual(dataset.dataset_serial, self.dataset_serial)
     date_time = dataset.date_time
     self.assertEqual(date_time.year, 2005)
     self.assertEqual(date_time.month, 6)
     self.assertEqual(date_time.day, 9)
     self.assertEqual(dataset.microscope,
                      "Leica microscope CAN bus adapter")
     self.assertEqual(dataset.description, 'Testing')
     # query frames_global
     global_query = self.session.query(db_ops.FramesGlobal) \
         .join(db_ops.DataSet) \
         .filter(db_ops.DataSet.dataset_serial == self.dataset_serial)
     self.assertEqual(
         global_query[0].storage_dir,
         self.storage_dir,
     )
     self.assertEqual(
         global_query[0].nbr_frames,
         self.nbr_channels * self.nbr_slices,
     )
     im_shape = self.im.shape
     self.assertEqual(
         global_query[0].im_width,
         im_shape[2],
     )
     self.assertEqual(
         global_query[0].im_height,
         im_shape[1],
     )
     self.assertEqual(global_query[0].nbr_slices, self.nbr_slices)
     self.assertEqual(
         global_query[0].nbr_channels,
         self.nbr_channels,
     )
     self.assertEqual(
         global_query[0].nbr_positions,
         1,
     )
     self.assertEqual(
         global_query[0].nbr_timepoints,
         1,
     )
     self.assertEqual(
         global_query[0].im_colors,
         1,
     )
     self.assertEqual(
         global_query[0].bit_depth,
         'uint16',
     )
     # query frames
     frames = self.session.query(db_ops.Frames) \
         .join(db_ops.FramesGlobal) \
         .join(db_ops.DataSet) \
         .filter(db_ops.DataSet.dataset_serial == self.dataset_serial) \
         .order_by(db_ops.Frames.file_name)
     # Images are separated by slice first then channel
     im_order = [0, 2, 4, 1, 3, 5]
     it = itertools.product(range(self.nbr_channels),
                            range(self.nbr_slices))
     for i, (c, z) in enumerate(it):
         im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z)
         self.assertEqual(frames[i].file_name, im_name)
         self.assertEqual(frames[i].channel_idx, c)
         self.assertEqual(frames[i].slice_idx, z)
         self.assertEqual(frames[i].time_idx, 0)
         self.assertEqual(frames[i].pos_idx, 0)
         sha256 = meta_utils.gen_sha256(self.im[im_order[i], ...])
         self.assertEqual(frames[i].sha256, sha256)
     # Download frames from storage and compare to originals
     it = itertools.product(range(self.nbr_channels),
                            range(self.nbr_slices))
     for i, (c, z) in enumerate(it):
         im_name = 'im_c00{}_z00{}_t000_p000.png'.format(c, z)
         im_path = os.path.join(self.mount_point, self.storage_dir, im_name)
         im = cv2.imread(im_path, cv2.IMREAD_ANYDEPTH)
         nose.tools.assert_equal(im.dtype, np.uint16)
         numpy.testing.assert_array_equal(im, self.im[im_order[i], ...])
Exemple #17
0
def test_gen_sha256_invalid_input():
    meta_utils.gen_sha256(5)